48
"""This class represents the abstract storage layout for saving information.
37
class ImmutableStore(object):
38
"""Store that holds files indexed by unique names.
50
40
Files can be added, but not modified once they are in. Typically
51
41
the hash is used as the name, or something else known to be unique,
44
>>> st = ImmutableScratchStore()
46
>>> st.add(StringIO('hello'), 'aa')
52
You are not allowed to add an id that is already present.
54
Entries can be retrieved as files, which may then be read.
56
>>> st.add(StringIO('goodbye'), '123123')
57
>>> st['123123'].read()
60
TODO: Atomic add by writing to a temporary file and renaming.
62
TODO: Perhaps automatically transform to/from XML in a method?
63
Would just need to tell the constructor what class to
66
TODO: Even within a simple disk store like this, we could
67
gzip the files. But since many are less than one disk
68
block, that might not help a lot.
56
raise NotImplementedError('Children should define their length')
58
def get(self, fileid, suffix=None):
59
"""Returns a file reading from a particular entry.
61
If suffix is present, retrieve the named suffix for fileid.
63
raise NotImplementedError
65
def __getitem__(self, fileid):
66
"""DEPRECATED. Please use .get(fileid) instead."""
67
raise NotImplementedError
69
#def __contains__(self, fileid):
70
# """Deprecated, please use has_id"""
71
# raise NotImplementedError
74
raise NotImplementedError
76
def add(self, f, fileid):
77
"""Add a file object f to the store accessible from the given fileid"""
78
raise NotImplementedError('Children of Store must define their method of adding entries.')
80
def has_id(self, fileid, suffix=None):
81
"""Return True or false for the presence of fileid in the store.
83
suffix, if present, is a per file suffix, i.e. for digital signature
85
raise NotImplementedError
88
"""Return True if this store is able to be listed."""
89
return hasattr(self, "__iter__")
91
def copy_all_ids(self, store_from, pb=None):
92
"""Copy all the file ids from store_from into self."""
93
if not store_from.listable():
94
raise UnlistableStore(store_from)
96
for count, file_id in enumerate(store_from):
98
pb.update('listing files', count, count)
102
mutter('copy_all ids: %r', ids)
103
self.copy_multi(store_from, ids, pb=pb)
105
def copy_multi(self, other, ids, pb=None, permit_failure=False):
72
def __init__(self, basedir):
73
"""ImmutableStore constructor."""
74
self._basedir = basedir
78
return os.path.join(self._basedir, id)
81
return "%s(%r)" % (self.__class__.__name__, self._basedir)
83
def add(self, f, fileid, compressed=True):
84
"""Add contents of a file into the store.
86
f -- An open file, or file-like object."""
87
# FIXME: Only works on smallish files
88
# TODO: Can be optimized by copying at the same time as
90
mutter("add store entry %r" % (fileid))
91
if isinstance(f, types.StringTypes):
96
p = self._path(fileid)
97
if os.access(p, os.F_OK) or os.access(p + '.gz', os.F_OK):
98
bailout("store %r already contains id %r" % (self._basedir, fileid))
101
f = gzip.GzipFile(p + '.gz', 'wb')
102
os.chmod(p + '.gz', 0444)
110
def copy_multi(self, other, ids):
106
111
"""Copy texts for ids from other into self.
108
113
If an id is present in self, it is skipped. A count of copied
109
114
ids is returned, which may be less than len(ids).
111
:param other: Another Store object
112
:param ids: A list of entry ids to be copied
113
:param pb: A ProgressBar object, if none is given, the default will be created.
114
:param permit_failure: Allow missing entries to be ignored
115
:return: (n_copied, [failed]) The number of entries copied successfully,
116
followed by a list of entries which could not be copied (because they
120
pb.update('preparing to copy')
123
ids = list(ids) # get the list for showing a length.
126
if self.has_id(fileid):
129
self._copy_one(fileid, None, other, pb)
130
for suffix in self._suffixes:
132
self._copy_one(fileid, suffix, other, pb)
136
pb.update('copy', count, len(ids))
142
assert count == len(ids)
147
def _copy_one(self, fileid, suffix, other, pb):
148
"""Most generic copy-one object routine.
150
Subclasses can override this to provide an optimised
151
copy between their own instances. Such overriden routines
152
should call this if they have no optimised facility for a
155
mutter('Store._copy_one: %r', fileid)
156
f = other.get(fileid, suffix)
157
self.add(f, fileid, suffix)
160
class TransportStore(Store):
161
"""A TransportStore is a Store superclass for Stores that use Transports."""
163
def add(self, f, fileid, suffix=None):
164
"""Add contents of a file into the store.
166
f -- A file-like object, or string
168
mutter("add store entry %r", fileid)
170
names = self._id_to_names(fileid, suffix)
171
if self._transport.has_any(names):
172
raise BzrError("store %r already contains id %r"
173
% (self._transport.base, fileid))
175
# Most of the time, just adding the file will work
176
# if we find a time where it fails, (because the dir
177
# doesn't exist), then create the dir, and try again
178
self._add(names[0], f)
181
def _add(self, relpath, f):
182
"""Actually add the file to the given location.
183
This should be overridden by children.
185
raise NotImplementedError('children need to implement this function.')
187
def _check_fileid(self, fileid):
188
if not isinstance(fileid, basestring):
189
raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))
190
if '\\' in fileid or '/' in fileid:
191
raise ValueError("invalid store id %r" % fileid)
193
def _id_to_names(self, fileid, suffix):
194
"""Return the names in the expected order"""
195
if suffix is not None:
196
fn = self._relpath(fileid, [suffix])
198
fn = self._relpath(fileid)
200
# FIXME RBC 20051128 this belongs in TextStore.
207
def has_id(self, fileid, suffix=None):
208
"""See Store.has_id."""
209
return self._transport.has_any(self._id_to_names(fileid, suffix))
211
def _get_name(self, fileid, suffix=None):
212
"""A special check, which returns the name of an existing file.
214
This is similar in spirit to 'has_id', but it is designed
215
to return information about which file the store has.
217
for name in self._id_to_names(fileid, suffix=suffix):
218
if self._transport.has(name):
222
def _get(self, filename):
223
"""Return an vanilla file stream for clients to read from.
225
This is the body of a template method on 'get', and should be
226
implemented by subclasses.
228
raise NotImplementedError
230
def get(self, fileid, suffix=None):
231
"""See Store.get()."""
232
names = self._id_to_names(fileid, suffix)
235
return self._get(name)
236
except errors.NoSuchFile:
238
raise KeyError(fileid)
240
def __init__(self, a_transport, prefixed=False, compressed=False,
241
dir_mode=None, file_mode=None,
243
assert isinstance(a_transport, Transport)
244
super(TransportStore, self).__init__()
245
self._transport = a_transport
246
self._prefixed = prefixed
247
# FIXME RBC 20051128 this belongs in TextStore.
248
self._compressed = compressed
249
self._suffixes = set()
250
self._escaped = escaped
252
# It is okay for these to be None, it just means they
253
# will just use the filesystem defaults
254
self._dir_mode = dir_mode
255
self._file_mode = file_mode
257
def _unescape(self, file_id):
258
"""If filename escaping is enabled for this store, unescape and return the filename."""
260
return urllib.unquote(file_id)
264
def _iter_files_recursive(self):
265
"""Iterate through the files in the transport."""
266
for quoted_relpath in self._transport.iter_files_recursive():
267
# transport iterator always returns quoted paths, regardless of
269
yield urllib.unquote(quoted_relpath)
120
self.add(other[id], id)
124
def __contains__(self, fileid):
126
p = self._path(fileid)
127
return (os.access(p, os.R_OK)
128
or os.access(p + '.gz', os.R_OK))
130
# TODO: Guard against the same thing being stored twice, compressed and uncompresse
271
132
def __iter__(self):
272
for relpath in self._iter_files_recursive():
273
# worst case is one of each suffix.
274
name = os.path.basename(relpath)
275
if name.endswith('.gz'):
278
for count in range(len(self._suffixes)):
279
for suffix in self._suffixes:
280
if name.endswith('.' + suffix):
283
yield self._unescape(name)
133
for f in os.listdir(self._basedir):
135
# TODO: case-insensitive?
285
140
def __len__(self):
286
return len(list(self.__iter__()))
288
def _relpath(self, fileid, suffixes=None):
289
self._check_fileid(fileid)
291
for suffix in suffixes:
292
if not suffix in self._suffixes:
293
raise ValueError("Unregistered suffix %r" % suffix)
294
self._check_fileid(suffix)
298
# hash_prefix adds the '/' separator
299
prefix = self.hash_prefix(fileid)
302
fileid = self._escape_file_id(fileid)
303
path = prefix + fileid
304
full_path = u'.'.join([path] + suffixes)
305
return urlescape(full_path)
307
def _escape_file_id(self, file_id):
308
"""Turn a file id into a filesystem safe string.
310
This is similar to a plain urllib.quote, except
311
it uses specific safe characters, so that it doesn't
312
have to translate a lot of valid file ids.
314
if not self._escaped:
316
if isinstance(file_id, unicode):
317
file_id = file_id.encode('utf-8')
318
# @ does not get escaped. This is because it is a valid
319
# filesystem character we use all the time, and it looks
320
# a lot better than seeing %40 all the time.
321
safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
322
r = [((c in safe) and c or ('%%%02x' % ord(c)))
326
def hash_prefix(self, fileid):
327
# fileid should be unescaped
329
fileid = self._escape_file_id(fileid)
330
return "%02x/" % (adler32(fileid) & 0xff)
333
if self._transport is None:
334
return "%s(None)" % (self.__class__.__name__)
336
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
341
"""Return True if this store is able to be listed."""
342
return self._transport.listable()
344
def register_suffix(self, suffix):
345
"""Register a suffix as being expected in this store."""
346
self._check_fileid(suffix)
348
raise ValueError('You cannot register the "gz" suffix.')
349
self._suffixes.add(suffix)
141
return len(os.listdir(self._basedir))
143
def __getitem__(self, fileid):
144
"""Returns a file reading from a particular entry."""
145
p = self._path(fileid)
147
return gzip.GzipFile(p + '.gz', 'rb')
149
if e.errno == errno.ENOENT:
351
154
def total_size(self):
352
155
"""Return (count, bytes)