~bzr-pqm/bzr/bzr.dev

3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
1
# Copyright (C) 2006, 2008 Canonical Ltd
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
16
17
"""A simple least-recently-used (LRU) cache."""
18
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
19
from bzrlib import (
20
    symbol_versioning,
21
    trace,
22
    )
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
23
24
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
25
class _LRUNode(object):
26
    """This maintains the linked-list which is the lru internals."""
27
28
    __slots__ = ('prev', 'next', 'key', 'value', 'cleanup', 'size')
29
30
    def __init__(self, key, value, cleanup=None):
31
        self.prev = None
32
        self.next = None
33
        self.key = key
34
        self.value = value
35
        self.cleanup = cleanup
36
        # TODO: We could compute this 'on-the-fly' like we used to, and remove
37
        #       one pointer from this object, we just need to decide if it
38
        #       actually costs us much of anything in normal usage
39
        self.size = None
40
41
    def __repr__(self):
42
        if self.next is None:
43
            next = None
44
        else:
45
            next = self.next.key
46
        if self.prev is None:
47
            prev = None
48
        else:
49
            prev = self.prev.key
50
        return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
51
                                     next, prev)
52
53
    def run_cleanup(self):
54
        if self.cleanup is not None:
55
            self.cleanup(self.key, self.value)
56
        self.cleanup = None
57
        # Just make sure to break any refcycles, etc
58
        self.value = None
59
60
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
61
class LRUCache(object):
62
    """A class which manages a cache of entries, removing unused ones."""
63
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
64
    def __init__(self, max_cache=100, after_cleanup_count=None,
65
                 after_cleanup_size=symbol_versioning.DEPRECATED_PARAMETER):
66
        if symbol_versioning.deprecated_passed(after_cleanup_size):
67
            symbol_versioning.warn('LRUCache.__init__(after_cleanup_size) was'
68
                                   ' deprecated in 1.11. Use'
69
                                   ' after_cleanup_count instead.',
70
                                   DeprecationWarning)
71
            after_cleanup_count = after_cleanup_size
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
72
        self._cache = {}
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
73
        # The "HEAD" of the lru linked list
74
        self._most_recently_used = None
75
        # The "TAIL" of the lru linked list
76
        self._last_recently_used = None
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
77
        self._update_max_cache(max_cache, after_cleanup_count)
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
78
79
    def __contains__(self, key):
80
        return key in self._cache
81
82
    def __getitem__(self, key):
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
83
        node = self._cache[key]
4178.3.4 by John Arbash Meinel
Shave off approx 100ms by inlining _record_access into __getitem__,
84
        # Inlined from _record_access to decrease the overhead of __getitem__
85
        # We also have more knowledge about structure if __getitem__ is
86
        # succeeding, then we know that self._most_recently_used must not be
87
        # None, etc.
88
        mru = self._most_recently_used
89
        if node is mru:
90
            # Nothing to do, this node is already at the head of the queue
91
            return node.value
92
        elif node is self._last_recently_used:
93
            self._last_recently_used = node.prev
94
        # Remove this node from the old location
95
        node_prev = node.prev
96
        node_next = node.next
97
        if node_prev is not None:
98
            node_prev.next = node_next
99
        if node_next is not None:
100
            node_next.prev = node_prev
101
        # Insert this node to the front of the list
102
        node.next = mru
103
        mru.prev = node
104
        self._most_recently_used = node
105
        node.prev = None
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
106
        return node.value
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
107
108
    def __len__(self):
109
        return len(self._cache)
110
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
111
    def _walk_lru(self):
4178.3.6 by John Arbash Meinel
Remove the asserts, and change some to AssertionError.
112
        """Walk the LRU list, only meant to be used in tests."""
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
113
        node = self._most_recently_used
114
        if node is not None:
4178.3.6 by John Arbash Meinel
Remove the asserts, and change some to AssertionError.
115
            if node.prev is not None:
116
                raise AssertionError('the _most_recently_used entry is not'
117
                                     ' supposed to have a previous entry'
118
                                     ' %s' % (node,))
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
119
        while node is not None:
120
            if node.next is None:
4178.3.6 by John Arbash Meinel
Remove the asserts, and change some to AssertionError.
121
                if node is not self._last_recently_used:
122
                    raise AssertionError('only the last node should have'
123
                                         ' no next value: %s' % (node,))
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
124
            else:
4178.3.6 by John Arbash Meinel
Remove the asserts, and change some to AssertionError.
125
                if node.next.prev is not node:
126
                    raise AssertionError('inconsistency found, node.next.prev'
127
                                         ' != node: %s' % (node,))
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
128
            if node.prev is None:
4178.3.6 by John Arbash Meinel
Remove the asserts, and change some to AssertionError.
129
                if node is not self._most_recently_used:
130
                    raise AssertionError('only the _most_recently_used should'
131
                                         ' not have a previous node: %s'
132
                                         % (node,))
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
133
            else:
4178.3.6 by John Arbash Meinel
Remove the asserts, and change some to AssertionError.
134
                if node.prev.next is not node:
135
                    raise AssertionError('inconsistency found, node.prev.next'
136
                                         ' != node: %s' % (node,))
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
137
            yield node
138
            node = node.next
139
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
140
    def add(self, key, value, cleanup=None):
141
        """Add a new value to the cache.
142
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
143
        Also, if the entry is ever removed from the cache, call
144
        cleanup(key, value).
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
145
146
        :param key: The key to store it under
147
        :param value: The object to store
148
        :param cleanup: None or a function taking (key, value) to indicate
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
149
                        'value' should be cleaned up.
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
150
        """
151
        if key in self._cache:
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
152
            node = self._cache[key]
153
            node.run_cleanup()
154
            node.value = value
155
            node.cleanup = cleanup
156
        else:
157
            node = _LRUNode(key, value, cleanup=cleanup)
158
            self._cache[key] = node
159
        self._record_access(node)
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
160
161
        if len(self._cache) > self._max_cache:
162
            # Trigger the cleanup
163
            self.cleanup()
164
4178.3.1 by John Arbash Meinel
Implement LRUCache.cache_size(), so that it can trivially substitute for FIFOCache.
165
    def cache_size(self):
166
        """Get the number of entries we will cache."""
167
        return self._max_cache
168
2998.2.1 by John Arbash Meinel
Implement LRUCache.get() which acts like dict.get()
169
    def get(self, key, default=None):
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
170
        node = self._cache.get(key, None)
171
        if node is None:
172
            return default
4178.3.5 by John Arbash Meinel
Add tests that LRUCache.get() properly tracks accesses.
173
        self._record_access(node)
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
174
        return node.value
2998.2.1 by John Arbash Meinel
Implement LRUCache.get() which acts like dict.get()
175
3763.8.10 by John Arbash Meinel
Add a .keys() member to LRUCache and LRUSizeCache.
176
    def keys(self):
177
        """Get the list of keys currently cached.
178
179
        Note that values returned here may not be available by the time you
180
        request them later. This is simply meant as a peak into the current
181
        state.
182
183
        :return: An unordered list of keys that are currently cached.
184
        """
185
        return self._cache.keys()
186
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
187
    def items(self):
188
        """Get the key:value pairs as a dict."""
189
        return dict((k, n.value) for k, n in self._cache.iteritems())
190
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
191
    def cleanup(self):
192
        """Clear the cache until it shrinks to the requested size.
193
194
        This does not completely wipe the cache, just makes sure it is under
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
195
        the after_cleanup_count.
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
196
        """
197
        # Make sure the cache is shrunk to the correct size
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
198
        while len(self._cache) > self._after_cleanup_count:
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
199
            self._remove_lru()
200
201
    def __setitem__(self, key, value):
202
        """Add a value to the cache, there will be no cleanup function."""
203
        self.add(key, value, cleanup=None)
204
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
205
    def _record_access(self, node):
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
206
        """Record that key was accessed."""
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
207
        # Move 'node' to the front of the queue
208
        if self._most_recently_used is None:
209
            self._most_recently_used = node
210
            self._last_recently_used = node
211
            return
212
        elif node is self._most_recently_used:
213
            # Nothing to do, this node is already at the head of the queue
214
            return
215
        elif node is self._last_recently_used:
216
            self._last_recently_used = node.prev
217
        # We've taken care of the tail pointer, remove the node, and insert it
218
        # at the front
219
        # REMOVE
220
        if node.prev is not None:
221
            node.prev.next = node.next
222
        if node.next is not None:
223
            node.next.prev = node.prev
224
        # INSERT
225
        node.next = self._most_recently_used
226
        self._most_recently_used = node
227
        node.next.prev = node
228
        node.prev = None
229
230
    def _remove_node(self, node):
231
        if node is self._last_recently_used:
232
            self._last_recently_used = node.prev
4178.3.6 by John Arbash Meinel
Remove the asserts, and change some to AssertionError.
233
        self._cache.pop(node.key)
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
234
        # If we have removed all entries, remove the head pointer as well
235
        if self._last_recently_used is None:
236
            self._most_recently_used = None
237
        node.run_cleanup()
4294.1.1 by John Arbash Meinel
When removing a node from an LRUCache, properly remove it from the linked list.
238
        # Now remove this node from the linked list
239
        if node.prev is not None:
240
            node.prev.next = node.next
241
        if node.next is not None:
242
            node.next.prev = node.prev
243
        # And remove this node's pointers
244
        node.prev = None
245
        node.next = None
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
246
247
    def _remove_lru(self):
248
        """Remove one entry from the lru, and handle consequences.
249
250
        If there are no more references to the lru, then this entry should be
251
        removed from the cache.
252
        """
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
253
        self._remove_node(self._last_recently_used)
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
254
255
    def clear(self):
256
        """Clear out all of the cache."""
257
        # Clean up in LRU order
3735.34.3 by John Arbash Meinel
Cleanup, in preparation for merging to brisbane-core.
258
        while self._cache:
259
            self._remove_lru()
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
260
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
261
    def resize(self, max_cache, after_cleanup_count=None):
262
        """Change the number of entries that will be cached."""
263
        self._update_max_cache(max_cache,
264
                               after_cleanup_count=after_cleanup_count)
265
266
    def _update_max_cache(self, max_cache, after_cleanup_count=None):
267
        self._max_cache = max_cache
268
        if after_cleanup_count is None:
269
            self._after_cleanup_count = self._max_cache * 8 / 10
270
        else:
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
271
            self._after_cleanup_count = min(after_cleanup_count,
272
                                            self._max_cache)
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
273
        self.cleanup()
274
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
275
276
class LRUSizeCache(LRUCache):
277
    """An LRUCache that removes things based on the size of the values.
278
279
    This differs in that it doesn't care how many actual items there are,
280
    it just restricts the cache to be cleaned up after so much data is stored.
281
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
282
    The size of items added will be computed using compute_size(value), which
283
    defaults to len() if not supplied.
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
284
    """
285
286
    def __init__(self, max_size=1024*1024, after_cleanup_size=None,
287
                 compute_size=None):
288
        """Create a new LRUSizeCache.
289
290
        :param max_size: The max number of bytes to store before we start
291
            clearing out entries.
292
        :param after_cleanup_size: After cleaning up, shrink everything to this
293
            size.
294
        :param compute_size: A function to compute the size of the values. We
295
            use a function here, so that you can pass 'len' if you are just
296
            using simple strings, or a more complex function if you are using
297
            something like a list of strings, or even a custom object.
298
            The function should take the form "compute_size(value) => integer".
299
            If not supplied, it defaults to 'len()'
300
        """
301
        self._value_size = 0
302
        self._compute_size = compute_size
303
        if compute_size is None:
304
            self._compute_size = len
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
305
        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
306
        LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
307
308
    def add(self, key, value, cleanup=None):
309
        """Add a new value to the cache.
310
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
311
        Also, if the entry is ever removed from the cache, call
312
        cleanup(key, value).
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
313
314
        :param key: The key to store it under
315
        :param value: The object to store
316
        :param cleanup: None or a function taking (key, value) to indicate
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
317
                        'value' should be cleaned up.
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
318
        """
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
319
        node = self._cache.get(key, None)
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
320
        value_len = self._compute_size(value)
321
        if value_len >= self._after_cleanup_size:
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
322
            # The new value is 'too big to fit', as it would fill up/overflow
323
            # the cache all by itself
324
            trace.mutter('Adding the key %r to an LRUSizeCache failed.'
325
                         ' value %d is too big to fit in a the cache'
326
                         ' with size %d %d', key, value_len,
327
                         self._after_cleanup_size, self._max_size)
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
328
            if node is not None:
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
329
                # We won't be replacing the old node, so just remove it
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
330
                self._remove_node(node)
4178.3.7 by John Arbash Meinel
Review tweaks from Ian.
331
            if cleanup is not None:
332
                cleanup(key, value)
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
333
            return
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
334
        if node is None:
335
            node = _LRUNode(key, value, cleanup=cleanup)
336
            self._cache[key] = node
337
        else:
338
            self._value_size -= node.size
339
        node.size = value_len
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
340
        self._value_size += value_len
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
341
        self._record_access(node)
2993.1.1 by Robert Collins
* New module ``lru_cache`` providing a cache for use by tasks that need
342
343
        if self._value_size > self._max_size:
344
            # Time to cleanup
345
            self.cleanup()
346
347
    def cleanup(self):
348
        """Clear the cache until it shrinks to the requested size.
349
350
        This does not completely wipe the cache, just makes sure it is under
351
        the after_cleanup_size.
352
        """
353
        # Make sure the cache is shrunk to the correct size
354
        while self._value_size > self._after_cleanup_size:
355
            self._remove_lru()
356
4178.3.3 by John Arbash Meinel
LRUCache is now implemented with a dict to a linked list,
357
    def _remove_node(self, node):
358
        self._value_size -= node.size
359
        LRUCache._remove_node(self, node)
3882.3.1 by John Arbash Meinel
Add LRUCache.resize(), and change the init arguments for LRUCache.
360
361
    def resize(self, max_size, after_cleanup_size=None):
362
        """Change the number of bytes that will be cached."""
363
        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
364
        max_cache = max(int(max_size/512), 1)
365
        self._update_max_cache(max_cache)
366
367
    def _update_max_size(self, max_size, after_cleanup_size=None):
368
        self._max_size = max_size
369
        if after_cleanup_size is None:
370
            self._after_cleanup_size = self._max_size * 8 / 10
371
        else:
372
            self._after_cleanup_size = min(after_cleanup_size, self._max_size)