53
51
Entries can be retrieved as files, which may then be read.
55
53
>>> st.add(StringIO('goodbye'), '123123')
56
>>> st['123123'].read()
54
>>> st.get('123123').read()
60
def __init__(self, transport, prefixed=False):
61
super(CompressedTextStore, self).__init__(transport)
62
self._prefixed = prefixed
64
def _check_fileid(self, fileid):
65
if '\\' in fileid or '/' in fileid:
66
raise ValueError("invalid store id %r" % fileid)
68
def _relpath(self, fileid):
69
self._check_fileid(fileid)
71
return hash_prefix(fileid) + fileid + ".gz"
75
def add(self, f, fileid):
76
"""Add contents of a file into the store.
78
f -- An open file, or file-like object."""
79
# TODO: implement an add_multi which can do some of it's
80
# own piplelining, and possible take advantage of
81
# transport.put_multi(). The problem is that
82
# entries potentially need to be compressed as they
83
# are received, which implies translation, which
84
# means it isn't as straightforward as we would like.
58
def _relpath(self, fileid, suffixes=[]):
59
suffixes = suffixes + ['gz']
60
return super(CompressedTextStore, self)._relpath(fileid, suffixes)
62
def _add(self, fn, f):
85
63
from cStringIO import StringIO
86
64
from bzrlib.osutils import pumpfile
88
mutter("add store entry %r" % (fileid))
89
66
if isinstance(f, basestring):
92
fn = self._relpath(fileid)
93
if self._transport.has(fn):
94
raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))
98
self._transport.mkdir(hash_prefix(fileid))
103
70
gf = gzip.GzipFile(mode='wb', fileobj=sio)
104
71
# if pumpfile handles files that don't fit in ram,
112
79
self._transport.put(fn, sio)
114
def _do_copy(self, other, to_copy, pb, permit_failure=False):
115
if isinstance(other, CompressedTextStore):
116
return self._copy_multi_text(other, to_copy, pb,
117
permit_failure=permit_failure)
118
return super(CompressedTextStore, self)._do_copy(other, to_copy,
119
pb, permit_failure=permit_failure)
121
def _copy_multi_text(self, other, to_copy, pb,
122
permit_failure=False):
123
# Because of _transport, we can no longer assume
124
# that they are on the same filesystem, we can, however
125
# assume that we only need to copy the exact bytes,
126
# we don't need to process the files.
131
for fileid, has in zip(to_copy, other.has(to_copy)):
133
new_to_copy.add(fileid)
136
to_copy = new_to_copy
137
#mutter('_copy_multi_text copying %s, failed %s' % (to_copy, failed))
139
paths = [self._relpath(fileid) for fileid in to_copy]
140
count = other._transport.copy_to(paths, self._transport, pb=pb)
141
assert count == len(to_copy)
144
def __contains__(self, fileid):
146
fn = self._relpath(fileid)
147
return self._transport.has(fn)
149
def has(self, fileids, pb=None):
150
"""Return True/False for each entry in fileids.
152
:param fileids: A List or generator yielding file ids.
153
:return: A generator or list returning True/False for each entry.
155
relpaths = (self._relpath(fid) for fid in fileids)
156
return self._transport.has_multi(relpaths, pb=pb)
158
def get(self, fileids, permit_failure=False, pb=None):
159
"""Return a set of files, one for each requested entry.
161
TODO: Write some tests to make sure that permit_failure is
164
TODO: What should the exception be for a missing file?
165
KeyError, or NoSuchFile?
168
# This next code gets a bit hairy because it can allow
169
# to not request a file which doesn't seem to exist.
170
# Also, the same fileid may be requested twice, so we
171
# can't just build up a map.
172
rel_paths = [self._relpath(fid) for fid in fileids]
175
#mutter('CompressedTextStore.get(permit_failure=%s)' % permit_failure)
178
for path, has in zip(rel_paths,
179
self._transport.has_multi(rel_paths)):
181
existing_paths.append(path)
182
is_requested.append(True)
184
is_requested.append(False)
185
#mutter('Retrieving %s out of %s' % (existing_paths, rel_paths))
81
def _copy_one(self, fileid, suffix, other, pb):
82
if not (isinstance(other, CompressedTextStore)
83
and other._prefixed == self._prefixed):
84
return super(CompressedTextStore, self)._copy_one(fileid, suffix,
86
if suffix is None or suffix == 'gz':
87
path = self._relpath(fileid)
187
#mutter('Retrieving all %s' % (rel_paths, ))
188
existing_paths = rel_paths
189
is_requested = [True for x in rel_paths]
192
for f in self._transport.get_multi(existing_paths, pb=pb):
193
assert count < len(is_requested)
194
while not is_requested[count]:
197
if hasattr(f, 'tell'):
198
yield gzip.GzipFile(mode='rb', fileobj=f)
200
from cStringIO import StringIO
201
sio = StringIO(f.read())
202
yield gzip.GzipFile(mode='rb', fileobj=sio)
205
while count < len(is_requested):
210
for relpath, st in self._iter_relpaths():
211
if relpath.endswith(".gz"):
212
yield os.path.basename(relpath)[:-3]
214
yield os.path.basename(relpath)
217
return len(list(self._iter_relpath()))
219
def __getitem__(self, fileid):
89
path = self._relpath(fileid, [suffix])
90
assert other._transport.copy_to([path], self._transport, pb=pb) == 1
92
def __init__(self, transport, prefixed=False):
93
super(CompressedTextStore, self).__init__(transport, prefixed)
94
self.register_suffix('gz')
96
def _get(self, filename):
220
97
"""Returns a file reading from a particular entry."""
221
f = super(CompressedTextStore, self).__getitem__(fileid)
98
f = self._transport.get(filename)
222
99
# gzip.GzipFile.read() requires a tell() function
223
100
# but some transports return objects that cannot seek
224
101
# so buffer them in a StringIO instead