1
# Copyright (C) 2006 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""An in-memory cache that falls back to disk when necessary."""
22
_PARAM_NOT_SUPPLIED = object()
25
class DiskBackedCache(object):
26
"""A dict-like object that caches using a disk object.
28
This caches in memory until the amount cache exceeds the
29
given amount. It will then either flush to disk, or just
30
stop caching new requests, depending on how it was set up.
32
(The use case is for loading remote or local information.
33
If it is local, cache it in memory, but don't cache too
34
much. For remote, cache in memory, but if size is too great
35
start flushing to disk.)
37
This doesn't implement 100% of the dict interface, but it
38
implements most of it to be useful. This is also meant to
39
be more used as a cache which is built up, and then cleared
40
all at once. Rather than one which is continually updated.
43
# Default memory size is 10MB
44
_default_max_size = 10*1024*1024
46
def __init__(self, use_disk=True, flush_all=False, max_size=None,
48
"""Initialize a new Cache object.
50
:param use_disk: If False, will not cache requests
51
on disk. After max_size has been reached, further
52
requests will be ignored.
53
If True, requests after max_size will be cached
54
in a local temporary file.
55
:param flush_all: Once max_size is reached, flush all the
56
data to disk, rather than keeping the current cache
58
:param max_size: The maximum amount of data to cache in RAM.
59
This class measures the amount of data cached, not the
60
size of keys, or any overhead.
61
If None, will default to self._default_max_size
62
Passing 0 disables all caching. Passing -1 will cache
64
:param allow_replace: If True, allow cache['foo'] = 'bar'
65
to replace an existing 'foo' key. Otherwise a KeyError
69
self._use_disk = use_disk
70
self._flush_all = flush_all
71
self._allow_replace = allow_replace
73
self._max_size = self._default_max_size
75
self._max_size = max_size
77
# Mapping from key => (pos_in_file, data)
78
# if pos_in_file is None, then data contains the actual string
79
# else data is the length of the string in the file
82
self._disk_cache = None
84
# Functions that just look at the keys just use the
85
# dicts builtin functions
86
self.iterkeys = self._dict.iterkeys
87
self.has_key = self._dict.has_key
88
self.keys = self._dict.keys
90
# These special functions just thunk into self._dict
91
# but they must exist on the class for python to support len()
92
# "if 'foo' in cache:", etc.
94
return len(self._dict)
96
def __contains__(self, *args, **kwargs):
97
return self._dict.__contains__(*args, **kwargs)
100
return iter(self._dict)
104
"""Return a generator that yields the contents of the cache"""
105
for key, (pos_in_file, data_or_size) in self._dict.iteritems():
106
yield key, self._get_mem_or_disk(pos_in_file, data_or_size)
109
"""Return the list of key, value pairs"""
110
return list(self.iteritems())
112
def itervalues(self):
113
"""Iterate over the values in the cache"""
114
for pos_in_file, data_or_size in self._dict.itervalues():
115
yield self._get_mem_or_disk(pos_in_file, data_or_size)
118
"""Return a list of the values in the dict"""
119
return list(self.itervalues())
121
# Start of custom functionality
122
def _get_mem_or_disk(self, pos_in_file, data_or_size):
123
"""Return the data, either directly or by reading the file."""
124
if pos_in_file is None:
127
self._disk_cache.seek(pos_in_file)
128
return self._disk_cache.read(data_or_size)
130
def __getitem__(self, key):
131
"""x.__getitem__(y) <==> x[y]"""
132
return self._get_mem_or_disk(*self._dict[key])
134
def get(self, key, val=None):
135
"""Same as dict.get()"""
136
return self._get_mem_or_disk(*self._dict.get(key, (None, val)))
138
def __delitem__(self, key):
139
"""Delete an item from the cache.
141
This does not actually delete anything that was written
142
to disk. That will be cleaned up when finished.
144
pos_in_file, data_or_size = self._dict.pop(key)
145
if pos_in_file is None:
146
self._cur_size -= len(data_or_size)
148
# No need to read the file if we are just removing
149
self._cur_size -= data_or_size
151
def _remove(self, pos_in_file, data_or_size):
152
"""Decrement the current size information and return the data"""
153
if pos_in_file is None:
154
self._cur_size -= len(data_or_size)
157
self._cur_size -= data_or_size
158
self._disk_cache.seek(pos_in_file)
159
return self._disk_cache.read(data_or_size)
161
def pop(self, key, val=_PARAM_NOT_SUPPLIED):
162
"""Same as dict.pop()"""
163
if val is not _PARAM_NOT_SUPPLIED:
164
if key not in self._dict:
166
pos_in_file, data_or_size = self._dict.pop(key)
167
return self._remove(pos_in_file, data_or_size)
170
key, (pos_in_file, data_or_size) = self._dict.popitem()
171
return key, self._remove(pos_in_file, data_or_size)
176
If a disk cache is used, it will be closed
180
self._disk_cache.close()
181
self._disk_cache = None
184
def _add_to_disk(self, key, val):
185
"""Add the given value to the disk cache.
187
:param key: The key to add the value under
188
:param val: A string to add to the disk cache.
190
if self._disk_cache is None:
191
# This creates a temporary file, but on Unix-like machines
192
# it actually deletes the disk record, so that it cannot be
193
# reached by other means.
194
self._disk_cache = tempfile.TemporaryFile()
197
# Go through all the items and update them
198
for old_key, (old_pos, data) in self._dict.items():
199
assert old_pos is None
200
self._disk_cache.write(data)
201
self._dict[old_key] = (pos, len(data))
204
# Seek to the end of the file
205
self._disk_cache.seek(0, 2)
206
pos = self._disk_cache.tell()
208
self._disk_cache.write(val)
209
self._dict[key] = (pos, size)
211
def _add_new_item(self, key, val):
212
"""Get a function that can return the new value.
214
Any function which wants to add something to the cache
215
should go through here. It will preserve the cache size
216
and either add the item to disk, or to memory, or possibly
219
if self._max_size == 0:
221
if not isinstance(val, str):
222
raise TypeError('DiskBackedCache can only store strings, not %s'
223
% val.__class__.__name__)
225
if key in self._dict:
226
if not self._allow_replace:
227
raise KeyError('Key %r already exists,'
228
' and replace is disallowed'
231
pos, data_or_size = self._dict[key]
233
old_size = len(data_or_size)
235
old_size = data_or_size
237
size_delta = size - old_size
238
new_size = self._cur_size + size_delta
240
if self._max_size < 0 or new_size <= self._max_size:
242
self._cur_size = new_size
243
self._dict[key] = (None, val)
246
# This is too big to fit in memory
247
# check if we put in disk
248
if not self._use_disk:
250
self._add_to_disk(key, val)
252
def __setitem__(self, key, val):
253
"""Add a new entry to the cache."""
254
self._add_new_item(key, val)
256
cache_size = property(lambda self: self._cur_size)