1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
1 |
# Copyright (C) 2005 by Canonical Development Ltd
|
2 |
||
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
||
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
||
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""
|
|
18 |
An implementation the primary storage type CompressedTextStore.
|
|
19 |
||
20 |
This store keeps compressed versions of the full text. It does not
|
|
21 |
do any sort of delta compression.
|
|
22 |
"""
|
|
23 |
||
24 |
import os, tempfile, gzip |
|
25 |
||
26 |
import bzrlib.store |
|
27 |
from bzrlib.trace import mutter |
|
28 |
from bzrlib.errors import BzrError |
|
29 |
||
30 |
from StringIO import StringIO |
|
31 |
from stat import ST_SIZE |
|
32 |
||
1092.2.24
by Robert Collins
merge from martins newformat branch - brings in transport abstraction |
33 |
class CompressedTextStore(bzrlib.store.TransportStore): |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
34 |
"""Store that holds files indexed by unique names.
|
35 |
||
36 |
Files can be added, but not modified once they are in. Typically
|
|
37 |
the hash is used as the name, or something else known to be unique,
|
|
38 |
such as a UUID.
|
|
39 |
||
40 |
Files are stored gzip compressed, with no delta compression.
|
|
41 |
||
42 |
>>> st = ScratchCompressedTextStore()
|
|
43 |
||
44 |
>>> st.add(StringIO('hello'), 'aa')
|
|
45 |
>>> 'aa' in st
|
|
46 |
True
|
|
47 |
>>> 'foo' in st
|
|
48 |
False
|
|
49 |
||
50 |
You are not allowed to add an id that is already present.
|
|
51 |
||
52 |
Entries can be retrieved as files, which may then be read.
|
|
53 |
||
54 |
>>> st.add(StringIO('goodbye'), '123123')
|
|
55 |
>>> st['123123'].read()
|
|
56 |
'goodbye'
|
|
57 |
"""
|
|
58 |
||
1092.2.24
by Robert Collins
merge from martins newformat branch - brings in transport abstraction |
59 |
def __init__(self, transport): |
60 |
super(CompressedTextStore, self).__init__(transport) |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
61 |
|
62 |
def _check_fileid(self, fileid): |
|
63 |
if '\\' in fileid or '/' in fileid: |
|
64 |
raise ValueError("invalid store id %r" % fileid) |
|
65 |
||
66 |
def _relpath(self, fileid): |
|
67 |
self._check_fileid(fileid) |
|
68 |
return fileid + '.gz' |
|
69 |
||
70 |
def add(self, f, fileid): |
|
71 |
"""Add contents of a file into the store.
|
|
72 |
||
73 |
f -- An open file, or file-like object."""
|
|
74 |
# TODO: implement an add_multi which can do some of it's
|
|
75 |
# own piplelining, and possible take advantage of
|
|
76 |
# transport.put_multi(). The problem is that
|
|
77 |
# entries potentially need to be compressed as they
|
|
78 |
# are received, which implies translation, which
|
|
79 |
# means it isn't as straightforward as we would like.
|
|
80 |
from cStringIO import StringIO |
|
81 |
from bzrlib.osutils import pumpfile |
|
82 |
||
83 |
mutter("add store entry %r" % (fileid)) |
|
84 |
if isinstance(f, basestring): |
|
85 |
f = StringIO(f) |
|
86 |
||
87 |
fn = self._relpath(fileid) |
|
88 |
if self._transport.has(fn): |
|
89 |
raise BzrError("store %r already contains id %r" % (self._transport.base, fileid)) |
|
90 |
||
91 |
||
92 |
sio = StringIO() |
|
93 |
gf = gzip.GzipFile(mode='wb', fileobj=sio) |
|
94 |
# if pumpfile handles files that don't fit in ram,
|
|
95 |
# so will this function
|
|
96 |
if isinstance(f, basestring): |
|
97 |
gf.write(f) |
|
98 |
else: |
|
99 |
pumpfile(f, gf) |
|
100 |
gf.close() |
|
101 |
sio.seek(0) |
|
102 |
self._transport.put(fn, sio) |
|
103 |
||
104 |
def _do_copy(self, other, to_copy, pb, permit_failure=False): |
|
105 |
if isinstance(other, CompressedTextStore): |
|
106 |
return self._copy_multi_text(other, to_copy, pb, |
|
107 |
permit_failure=permit_failure) |
|
108 |
return super(CompressedTextStore, self)._do_copy(other, to_copy, |
|
109 |
pb, permit_failure=permit_failure) |
|
110 |
||
111 |
def _copy_multi_text(self, other, to_copy, pb, |
|
112 |
permit_failure=False): |
|
113 |
# Because of _transport, we can no longer assume
|
|
114 |
# that they are on the same filesystem, we can, however
|
|
115 |
# assume that we only need to copy the exact bytes,
|
|
116 |
# we don't need to process the files.
|
|
117 |
||
118 |
failed = set() |
|
119 |
if permit_failure: |
|
120 |
new_to_copy = set() |
|
1185.11.3
by John Arbash Meinel
Got some more tests to pass, still broken. |
121 |
for fileid, has in zip(to_copy, other.has(to_copy)): |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
122 |
if has: |
123 |
new_to_copy.add(fileid) |
|
124 |
else: |
|
125 |
failed.add(fileid) |
|
126 |
to_copy = new_to_copy |
|
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
127 |
#mutter('_copy_multi_text copying %s, failed %s' % (to_copy, failed))
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
128 |
|
129 |
paths = [self._relpath(fileid) for fileid in to_copy] |
|
130 |
count = other._transport.copy_to(paths, self._transport, pb=pb) |
|
131 |
assert count == len(to_copy) |
|
132 |
return count, failed |
|
133 |
||
134 |
def __contains__(self, fileid): |
|
135 |
""""""
|
|
136 |
fn = self._relpath(fileid) |
|
137 |
return self._transport.has(fn) |
|
138 |
||
139 |
def has(self, fileids, pb=None): |
|
140 |
"""Return True/False for each entry in fileids.
|
|
141 |
||
142 |
:param fileids: A List or generator yielding file ids.
|
|
143 |
:return: A generator or list returning True/False for each entry.
|
|
144 |
"""
|
|
145 |
relpaths = (self._relpath(fid) for fid in fileids) |
|
146 |
return self._transport.has_multi(relpaths, pb=pb) |
|
147 |
||
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
148 |
def get(self, fileids, permit_failure=False, pb=None): |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
149 |
"""Return a set of files, one for each requested entry.
|
150 |
|
|
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
151 |
TODO: Write some tests to make sure that permit_failure is
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
152 |
handled correctly.
|
153 |
||
154 |
TODO: What should the exception be for a missing file?
|
|
155 |
KeyError, or NoSuchFile?
|
|
156 |
"""
|
|
157 |
||
158 |
# This next code gets a bit hairy because it can allow
|
|
159 |
# to not request a file which doesn't seem to exist.
|
|
160 |
# Also, the same fileid may be requested twice, so we
|
|
161 |
# can't just build up a map.
|
|
162 |
rel_paths = [self._relpath(fid) for fid in fileids] |
|
163 |
is_requested = [] |
|
164 |
||
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
165 |
#mutter('CompressedTextStore.get(permit_failure=%s)' % permit_failure)
|
166 |
if permit_failure: |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
167 |
existing_paths = [] |
168 |
for path, has in zip(rel_paths, |
|
169 |
self._transport.has_multi(rel_paths)): |
|
170 |
if has: |
|
171 |
existing_paths.append(path) |
|
172 |
is_requested.append(True) |
|
173 |
else: |
|
174 |
is_requested.append(False) |
|
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
175 |
#mutter('Retrieving %s out of %s' % (existing_paths, rel_paths))
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
176 |
else: |
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
177 |
#mutter('Retrieving all %s' % (rel_paths, ))
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
178 |
existing_paths = rel_paths |
179 |
is_requested = [True for x in rel_paths] |
|
180 |
||
181 |
count = 0 |
|
182 |
for f in self._transport.get_multi(existing_paths, pb=pb): |
|
183 |
assert count < len(is_requested) |
|
184 |
while not is_requested[count]: |
|
185 |
yield None |
|
186 |
count += 1 |
|
187 |
if hasattr(f, 'tell'): |
|
188 |
yield gzip.GzipFile(mode='rb', fileobj=f) |
|
189 |
else: |
|
190 |
from cStringIO import StringIO |
|
191 |
sio = StringIO(f.read()) |
|
192 |
yield gzip.GzipFile(mode='rb', fileobj=sio) |
|
193 |
count += 1 |
|
194 |
||
195 |
while count < len(is_requested): |
|
196 |
yield None |
|
197 |
count += 1 |
|
198 |
||
199 |
def __iter__(self): |
|
200 |
# TODO: case-insensitive?
|
|
201 |
for f in self._transport.list_dir('.'): |
|
202 |
if f[-3:] == '.gz': |
|
203 |
yield f[:-3] |
|
204 |
else: |
|
205 |
yield f |
|
206 |
||
207 |
def __len__(self): |
|
208 |
return len([f for f in self._transport.list_dir('.')]) |
|
209 |
||
210 |
||
211 |
def __getitem__(self, fileid): |
|
212 |
"""Returns a file reading from a particular entry."""
|
|
213 |
fn = self._relpath(fileid) |
|
214 |
# This will throw if the file doesn't exist.
|
|
215 |
try: |
|
216 |
f = self._transport.get(fn) |
|
217 |
except: |
|
218 |
raise KeyError('This store (%s) does not contain %s' % (self, fileid)) |
|
219 |
||
220 |
# gzip.GzipFile.read() requires a tell() function
|
|
221 |
# but some transports return objects that cannot seek
|
|
222 |
# so buffer them in a StringIO instead
|
|
223 |
if hasattr(f, 'tell'): |
|
224 |
return gzip.GzipFile(mode='rb', fileobj=f) |
|
225 |
else: |
|
226 |
from cStringIO import StringIO |
|
227 |
sio = StringIO(f.read()) |
|
228 |
return gzip.GzipFile(mode='rb', fileobj=sio) |
|
229 |
||
230 |
||
231 |
def total_size(self): |
|
232 |
"""Return (count, bytes)
|
|
233 |
||
234 |
This is the (compressed) size stored on disk, not the size of
|
|
235 |
the content."""
|
|
236 |
total = 0 |
|
237 |
count = 0 |
|
238 |
relpaths = [self._relpath(fid) for fid in self] |
|
239 |
for st in self._transport.stat_multi(relpaths): |
|
240 |
count += 1 |
|
241 |
total += st[ST_SIZE] |
|
242 |
||
243 |
return count, total |
|
244 |
||
1092.2.24
by Robert Collins
merge from martins newformat branch - brings in transport abstraction |
245 |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
246 |
class ScratchCompressedTextStore(CompressedTextStore): |
247 |
"""Self-destructing test subclass of CompressedTextStore.
|
|
248 |
||
249 |
The Store only exists for the lifetime of the Python object.
|
|
250 |
Obviously you should not put anything precious in it.
|
|
251 |
"""
|
|
252 |
def __init__(self): |
|
253 |
from transport import transport |
|
1393.2.3
by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass. |
254 |
t = transport(tempfile.mkdtemp()) |
255 |
super(ScratchCompressedTextStore, self).__init__(t) |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
256 |
|
257 |
def __del__(self): |
|
258 |
self._transport.delete_multi(self._transport.list_dir('.')) |
|
259 |
os.rmdir(self._transport.base) |
|
260 |
mutter("%r destroyed" % self) |
|
261 |