55
"""This class represents the abstract storage layout for saving information.
38
"""Store that holds files indexed by unique names.
57
40
Files can be added, but not modified once they are in. Typically
58
41
the hash is used as the name, or something else known to be unique,
44
>>> st = ImmutableScratchStore()
46
>>> st.add(StringIO('hello'), 'aa')
52
You are not allowed to add an id that is already present.
54
Entries can be retrieved as files, which may then be read.
56
>>> st.add(StringIO('goodbye'), '123123')
57
>>> st['123123'].read()
60
TODO: Atomic add by writing to a temporary file and renaming.
62
TODO: Perhaps automatically transform to/from XML in a method?
63
Would just need to tell the constructor what class to
66
TODO: Even within a simple disk store like this, we could
67
gzip the files. But since many are less than one disk
68
block, that might not help a lot.
72
def __init__(self, basedir):
73
"""ImmutableStore constructor."""
74
self._basedir = basedir
78
return os.path.join(self._basedir, id)
81
return "%s(%r)" % (self.__class__.__name__, self._basedir)
83
def add(self, f, fileid, compressed=True):
84
"""Add contents of a file into the store.
86
f -- An open file, or file-like object."""
87
# FIXME: Only works on smallish files
88
# TODO: Can be optimized by copying at the same time as
90
mutter("add store entry %r" % (fileid))
91
if isinstance(f, types.StringTypes):
96
p = self._path(fileid)
97
if os.access(p, os.F_OK) or os.access(p + '.gz', os.F_OK):
98
bailout("store %r already contains id %r" % (self._basedir, fileid))
101
f = gzip.GzipFile(p + '.gz', 'wb')
102
os.chmod(p + '.gz', 0444)
111
def __contains__(self, fileid):
113
p = self._path(fileid)
114
return (os.access(p, os.R_OK)
115
or os.access(p + '.gz', os.R_OK))
117
# TODO: Guard against the same thing being stored twice, compressed and uncompresse
120
for f in os.listdir(self._basedir):
122
# TODO: case-insensitive?
62
127
def __len__(self):
63
raise NotImplementedError('Children should define their length')
65
def get(self, fileid, suffix=None):
66
"""Returns a file reading from a particular entry.
68
If suffix is present, retrieve the named suffix for fileid.
70
raise NotImplementedError
128
return len(os.listdir(self._basedir))
72
130
def __getitem__(self, fileid):
73
"""DEPRECATED. Please use .get(fileid) instead."""
74
raise NotImplementedError
76
#def __contains__(self, fileid):
77
# """Deprecated, please use has_id"""
78
# raise NotImplementedError
81
raise NotImplementedError
83
def add(self, f, fileid):
84
"""Add a file object f to the store accessible from the given fileid"""
85
raise NotImplementedError('Children of Store must define their method of adding entries.')
87
def has_id(self, fileid, suffix=None):
88
"""Return True or false for the presence of fileid in the store.
90
suffix, if present, is a per file suffix, i.e. for digital signature
92
raise NotImplementedError
95
"""Return True if this store is able to be listed."""
96
return (getattr(self, "__iter__", None) is not None)
98
def copy_all_ids(self, store_from, pb=None):
99
"""Copy all the file ids from store_from into self."""
100
if not store_from.listable():
101
raise UnlistableStore(store_from)
103
for count, file_id in enumerate(store_from):
105
pb.update('listing files', count, count)
109
mutter('copy_all ids: %r', ids)
110
self.copy_multi(store_from, ids, pb=pb)
112
def copy_multi(self, other, ids, pb=None, permit_failure=False):
113
"""Copy texts for ids from other into self.
115
If an id is present in self, it is skipped. A count of copied
116
ids is returned, which may be less than len(ids).
118
:param other: Another Store object
119
:param ids: A list of entry ids to be copied
120
:param pb: A ProgressBar object, if none is given, the default will be created.
121
:param permit_failure: Allow missing entries to be ignored
122
:return: (n_copied, [failed]) The number of entries copied successfully,
123
followed by a list of entries which could not be copied (because they
127
pb.update('preparing to copy')
132
if self.has_id(fileid):
135
self._copy_one(fileid, None, other, pb)
136
for suffix in self._suffixes:
138
self._copy_one(fileid, suffix, other, pb)
142
pb.update('copy', count, len(ids))
148
assert count == len(ids)
153
def _copy_one(self, fileid, suffix, other, pb):
154
"""Most generic copy-one object routine.
156
Subclasses can override this to provide an optimised
157
copy between their own instances. Such overriden routines
158
should call this if they have no optimised facility for a
161
mutter('Store._copy_one: %r', fileid)
162
f = other.get(fileid, suffix)
163
self.add(f, fileid, suffix)
166
class TransportStore(Store):
167
"""A TransportStore is a Store superclass for Stores that use Transports."""
169
def add(self, f, fileid, suffix=None):
170
"""Add contents of a file into the store.
172
f -- A file-like object
174
mutter("add store entry %r", fileid)
175
names = self._id_to_names(fileid, suffix)
176
if self._transport.has_any(names):
177
raise BzrError("store %r already contains id %r"
178
% (self._transport.base, fileid))
180
# Most of the time, just adding the file will work
181
# if we find a time where it fails, (because the dir
182
# doesn't exist), then create the dir, and try again
183
self._add(names[0], f)
185
def _add(self, relpath, f):
186
"""Actually add the file to the given location.
187
This should be overridden by children.
189
raise NotImplementedError('children need to implement this function.')
191
def _check_fileid(self, fileid):
192
if not isinstance(fileid, basestring):
193
raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))
194
if '\\' in fileid or '/' in fileid:
195
raise ValueError("invalid store id %r" % fileid)
197
def _id_to_names(self, fileid, suffix):
198
"""Return the names in the expected order"""
199
if suffix is not None:
200
fn = self._relpath(fileid, [suffix])
202
fn = self._relpath(fileid)
204
# FIXME RBC 20051128 this belongs in TextStore.
211
def has_id(self, fileid, suffix=None):
212
"""See Store.has_id."""
213
return self._transport.has_any(self._id_to_names(fileid, suffix))
215
def _get_name(self, fileid, suffix=None):
216
"""A special check, which returns the name of an existing file.
218
This is similar in spirit to 'has_id', but it is designed
219
to return information about which file the store has.
221
for name in self._id_to_names(fileid, suffix=suffix):
222
if self._transport.has(name):
226
def _get(self, filename):
227
"""Return an vanilla file stream for clients to read from.
229
This is the body of a template method on 'get', and should be
230
implemented by subclasses.
232
raise NotImplementedError
234
def get(self, fileid, suffix=None):
235
"""See Store.get()."""
236
names = self._id_to_names(fileid, suffix)
239
return self._get(name)
240
except errors.NoSuchFile:
242
raise KeyError(fileid)
244
def __init__(self, a_transport, prefixed=False, compressed=False,
245
dir_mode=None, file_mode=None,
247
assert isinstance(a_transport, Transport)
248
super(TransportStore, self).__init__()
249
self._transport = a_transport
250
self._prefixed = prefixed
251
# FIXME RBC 20051128 this belongs in TextStore.
252
self._compressed = compressed
253
self._suffixes = set()
254
self._escaped = escaped
256
# It is okay for these to be None, it just means they
257
# will just use the filesystem defaults
258
self._dir_mode = dir_mode
259
self._file_mode = file_mode
261
def _unescape(self, file_id):
262
"""If filename escaping is enabled for this store, unescape and return the filename."""
264
return urllib.unquote(file_id)
268
def _iter_files_recursive(self):
269
"""Iterate through the files in the transport."""
270
for quoted_relpath in self._transport.iter_files_recursive():
271
# transport iterator always returns quoted paths, regardless of
273
yield urllib.unquote(quoted_relpath)
276
for relpath in self._iter_files_recursive():
277
# worst case is one of each suffix.
278
name = os.path.basename(relpath)
279
if name.endswith('.gz'):
282
for count in range(len(self._suffixes)):
283
for suffix in self._suffixes:
284
if name.endswith('.' + suffix):
287
yield self._unescape(name)
290
return len(list(self.__iter__()))
292
def _relpath(self, fileid, suffixes=None):
293
self._check_fileid(fileid)
295
for suffix in suffixes:
296
if not suffix in self._suffixes:
297
raise ValueError("Unregistered suffix %r" % suffix)
298
self._check_fileid(suffix)
301
fileid = self._escape_file_id(fileid)
303
# hash_prefix adds the '/' separator
304
prefix = self.hash_prefix(fileid, escaped=True)
307
path = prefix + fileid
308
full_path = u'.'.join([path] + suffixes)
309
return urlutils.escape(full_path)
311
def _escape_file_id(self, file_id):
312
"""Turn a file id into a filesystem safe string.
314
This is similar to a plain urllib.quote, except
315
it uses specific safe characters, so that it doesn't
316
have to translate a lot of valid file ids.
318
if not self._escaped:
320
if isinstance(file_id, unicode):
321
file_id = file_id.encode('utf-8')
322
# @ does not get escaped. This is because it is a valid
323
# filesystem character we use all the time, and it looks
324
# a lot better than seeing %40 all the time.
325
safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
326
r = [((c in safe) and c or ('%%%02x' % ord(c)))
330
def hash_prefix(self, fileid, escaped=False):
331
# fileid should be unescaped
332
if not escaped and self._escaped:
333
fileid = self._escape_file_id(fileid)
334
return "%02x/" % (adler32(fileid) & 0xff)
337
if self._transport is None:
338
return "%s(None)" % (self.__class__.__name__)
340
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
345
"""Return True if this store is able to be listed."""
346
return self._transport.listable()
348
def register_suffix(self, suffix):
349
"""Register a suffix as being expected in this store."""
350
self._check_fileid(suffix)
352
raise ValueError('You cannot register the "gz" suffix.')
353
self._suffixes.add(suffix)
131
"""Returns a file reading from a particular entry."""
132
p = self._path(fileid)
134
return gzip.GzipFile(p + '.gz', 'rb')
136
if e.errno == errno.ENOENT:
355
141
def total_size(self):
356
142
"""Return (count, bytes)