48
"""This class represents the abstract storage layout for saving information.
38
class ImmutableStore(object):
39
"""Store that holds files indexed by unique names.
50
41
Files can be added, but not modified once they are in. Typically
51
42
the hash is used as the name, or something else known to be unique,
45
>>> st = ImmutableScratchStore()
47
>>> st.add(StringIO('hello'), 'aa')
53
You are not allowed to add an id that is already present.
55
Entries can be retrieved as files, which may then be read.
57
>>> st.add(StringIO('goodbye'), '123123')
58
>>> st['123123'].read()
61
TODO: Atomic add by writing to a temporary file and renaming.
63
In bzr 0.0.5 and earlier, files within the store were marked
64
readonly on disk. This is no longer done but existing stores need
56
raise NotImplementedError('Children should define their length')
58
def get(self, fileid, suffix=None):
59
"""Returns a file reading from a particular entry.
61
If suffix is present, retrieve the named suffix for fileid.
63
raise NotImplementedError
65
def __getitem__(self, fileid):
66
"""DEPRECATED. Please use .get(fileid) instead."""
67
raise NotImplementedError
69
#def __contains__(self, fileid):
70
# """Deprecated, please use has_id"""
71
# raise NotImplementedError
74
raise NotImplementedError
76
def add(self, f, fileid):
77
"""Add a file object f to the store accessible from the given fileid"""
78
raise NotImplementedError('Children of Store must define their method of adding entries.')
80
def has_id(self, fileid, suffix=None):
81
"""Return True or false for the presence of fileid in the store.
83
suffix, if present, is a per file suffix, i.e. for digital signature
85
raise NotImplementedError
88
"""Return True if this store is able to be listed."""
89
return hasattr(self, "__iter__")
91
def copy_all_ids(self, store_from, pb=None):
92
"""Copy all the file ids from store_from into self."""
93
if not store_from.listable():
94
raise UnlistableStore(store_from)
96
for count, file_id in enumerate(store_from):
98
pb.update('listing files', count, count)
102
mutter('copy_all ids: %r', ids)
103
self.copy_multi(store_from, ids, pb=pb)
105
def copy_multi(self, other, ids, pb=None, permit_failure=False):
68
def __init__(self, basedir):
69
self._basedir = basedir
71
def _path(self, entry_id):
72
if not isinstance(entry_id, basestring):
73
raise TypeError(type(entry_id))
74
if '\\' in entry_id or '/' in entry_id:
75
raise ValueError("invalid store id %r" % entry_id)
76
return os.path.join(self._basedir, entry_id)
79
return "%s(%r)" % (self.__class__.__name__, self._basedir)
81
def add(self, f, fileid, compressed=True):
82
"""Add contents of a file into the store.
84
f -- An open file, or file-like object."""
85
# FIXME: Only works on files that will fit in memory
87
from bzrlib.atomicfile import AtomicFile
89
mutter("add store entry %r" % (fileid))
90
if isinstance(f, types.StringTypes):
95
p = self._path(fileid)
96
if os.access(p, os.F_OK) or os.access(p + '.gz', os.F_OK):
97
raise BzrError("store %r already contains id %r" % (self._basedir, fileid))
103
af = AtomicFile(fn, 'wb')
106
gf = gzip.GzipFile(mode='wb', fileobj=af)
116
def copy_multi(self, other, ids, permit_failure=False):
106
117
"""Copy texts for ids from other into self.
108
If an id is present in self, it is skipped. A count of copied
109
ids is returned, which may be less than len(ids).
119
If an id is present in self, it is skipped.
111
:param other: Another Store object
112
:param ids: A list of entry ids to be copied
113
:param pb: A ProgressBar object, if none is given, the default will be created.
114
:param permit_failure: Allow missing entries to be ignored
115
:return: (n_copied, [failed]) The number of entries copied successfully,
116
followed by a list of entries which could not be copied (because they
121
Returns (count_copied, failed), where failed is a collection of ids
122
that could not be copied.
120
pb.update('preparing to copy')
124
pb = bzrlib.ui.ui_factory.progress_bar()
126
pb.update('preparing to copy')
127
to_copy = [id for id in ids if id not in self]
128
if isinstance(other, ImmutableStore):
129
return self.copy_multi_immutable(other, to_copy, pb,
130
permit_failure=permit_failure)
123
ids = list(ids) # get the list for showing a length.
126
if self.has_id(fileid):
135
pb.update('copy', count, len(to_copy))
136
if not permit_failure:
137
self.add(other[id], id)
146
if not permit_failure:
147
assert count == len(to_copy)
151
def copy_multi_immutable(self, other, to_copy, pb, permit_failure=False):
152
from shutil import copyfile
157
other_p = other._path(id)
129
self._copy_one(fileid, None, other, pb)
130
for suffix in self._suffixes:
132
self._copy_one(fileid, suffix, other, pb)
136
pb.update('copy', count, len(ids))
161
if e.errno == errno.ENOENT:
162
if not permit_failure:
163
copyfile(other_p+".gz", p+".gz")
166
copyfile(other_p+".gz", p+".gz")
168
if e.errno == errno.ENOENT:
142
assert count == len(ids)
176
pb.update('copy', count, len(to_copy))
177
assert count == len(to_copy)
145
179
return count, failed
147
def _copy_one(self, fileid, suffix, other, pb):
148
"""Most generic copy-one object routine.
150
Subclasses can override this to provide an optimised
151
copy between their own instances. Such overriden routines
152
should call this if they have no optimised facility for a
155
mutter('Store._copy_one: %r', fileid)
156
f = other.get(fileid, suffix)
157
self.add(f, fileid, suffix)
160
class TransportStore(Store):
161
"""A TransportStore is a Store superclass for Stores that use Transports."""
163
def add(self, f, fileid, suffix=None):
164
"""Add contents of a file into the store.
166
f -- A file-like object, or string
168
mutter("add store entry %r", fileid)
170
names = self._id_to_names(fileid, suffix)
171
if self._transport.has_any(names):
172
raise BzrError("store %r already contains id %r"
173
% (self._transport.base, fileid))
175
# Most of the time, just adding the file will work
176
# if we find a time where it fails, (because the dir
177
# doesn't exist), then create the dir, and try again
178
self._add(names[0], f)
181
def _add(self, relpath, f):
182
"""Actually add the file to the given location.
183
This should be overridden by children.
185
raise NotImplementedError('children need to implement this function.')
187
def _check_fileid(self, fileid):
188
if not isinstance(fileid, basestring):
189
raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))
190
if '\\' in fileid or '/' in fileid:
191
raise ValueError("invalid store id %r" % fileid)
193
def _id_to_names(self, fileid, suffix):
194
"""Return the names in the expected order"""
195
if suffix is not None:
196
fn = self._relpath(fileid, [suffix])
198
fn = self._relpath(fileid)
200
# FIXME RBC 20051128 this belongs in TextStore.
207
def has_id(self, fileid, suffix=None):
208
"""See Store.has_id."""
209
return self._transport.has_any(self._id_to_names(fileid, suffix))
211
def _get_name(self, fileid, suffix=None):
212
"""A special check, which returns the name of an existing file.
214
This is similar in spirit to 'has_id', but it is designed
215
to return information about which file the store has.
217
for name in self._id_to_names(fileid, suffix=suffix):
218
if self._transport.has(name):
222
def _get(self, filename):
223
"""Return an vanilla file stream for clients to read from.
225
This is the body of a template method on 'get', and should be
226
implemented by subclasses.
228
raise NotImplementedError
230
def get(self, fileid, suffix=None):
231
"""See Store.get()."""
232
names = self._id_to_names(fileid, suffix)
235
return self._get(name)
236
except errors.NoSuchFile:
182
def __contains__(self, fileid):
184
p = self._path(fileid)
185
return (os.access(p, os.R_OK)
186
or os.access(p + '.gz', os.R_OK))
188
# TODO: Guard against the same thing being stored twice, compressed and uncompresse
191
for f in os.listdir(self._basedir):
193
# TODO: case-insensitive?
199
return len(os.listdir(self._basedir))
202
def __getitem__(self, fileid):
203
"""Returns a file reading from a particular entry."""
204
p = self._path(fileid)
206
return gzip.GzipFile(p + '.gz', 'rb')
208
if e.errno != errno.ENOENT:
214
if e.errno != errno.ENOENT:
238
217
raise KeyError(fileid)
240
def __init__(self, a_transport, prefixed=False, compressed=False,
241
dir_mode=None, file_mode=None,
243
assert isinstance(a_transport, Transport)
244
super(TransportStore, self).__init__()
245
self._transport = a_transport
246
self._prefixed = prefixed
247
# FIXME RBC 20051128 this belongs in TextStore.
248
self._compressed = compressed
249
self._suffixes = set()
250
self._escaped = escaped
252
# It is okay for these to be None, it just means they
253
# will just use the filesystem defaults
254
self._dir_mode = dir_mode
255
self._file_mode = file_mode
257
def _unescape(self, file_id):
258
"""If filename escaping is enabled for this store, unescape and return the filename."""
260
return urllib.unquote(file_id)
264
def _iter_files_recursive(self):
265
"""Iterate through the files in the transport."""
266
for quoted_relpath in self._transport.iter_files_recursive():
267
# transport iterator always returns quoted paths, regardless of
269
yield urllib.unquote(quoted_relpath)
272
for relpath in self._iter_files_recursive():
273
# worst case is one of each suffix.
274
name = os.path.basename(relpath)
275
if name.endswith('.gz'):
278
for count in range(len(self._suffixes)):
279
for suffix in self._suffixes:
280
if name.endswith('.' + suffix):
283
yield self._unescape(name)
286
return len(list(self.__iter__()))
288
def _relpath(self, fileid, suffixes=None):
289
self._check_fileid(fileid)
291
for suffix in suffixes:
292
if not suffix in self._suffixes:
293
raise ValueError("Unregistered suffix %r" % suffix)
294
self._check_fileid(suffix)
298
# hash_prefix adds the '/' separator
299
prefix = self.hash_prefix(fileid)
302
fileid = self._escape_file_id(fileid)
303
path = prefix + fileid
304
full_path = u'.'.join([path] + suffixes)
305
return urlescape(full_path)
307
def _escape_file_id(self, file_id):
308
"""Turn a file id into a filesystem safe string.
310
This is similar to a plain urllib.quote, except
311
it uses specific safe characters, so that it doesn't
312
have to translate a lot of valid file ids.
314
if not self._escaped:
316
if isinstance(file_id, unicode):
317
file_id = file_id.encode('utf-8')
318
# @ does not get escaped. This is because it is a valid
319
# filesystem character we use all the time, and it looks
320
# a lot better than seeing %40 all the time.
321
safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
322
r = [((c in safe) and c or ('%%%02x' % ord(c)))
326
def hash_prefix(self, fileid):
327
# fileid should be unescaped
329
fileid = self._escape_file_id(fileid)
330
return "%02x/" % (adler32(fileid) & 0xff)
333
if self._transport is None:
334
return "%s(None)" % (self.__class__.__name__)
336
return "%s(%r)" % (self.__class__.__name__, self._transport.base)
341
"""Return True if this store is able to be listed."""
342
return self._transport.listable()
344
def register_suffix(self, suffix):
345
"""Register a suffix as being expected in this store."""
346
self._check_fileid(suffix)
348
raise ValueError('You cannot register the "gz" suffix.')
349
self._suffixes.add(suffix)
351
220
def total_size(self):
352
221
"""Return (count, bytes)