1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
1 |
# Copyright (C) 2005 by Canonical Development Ltd
|
2 |
||
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
||
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
||
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""
|
|
18 |
An implementation the primary storage type CompressedTextStore.
|
|
19 |
||
20 |
This store keeps compressed versions of the full text. It does not
|
|
21 |
do any sort of delta compression.
|
|
22 |
"""
|
|
23 |
||
24 |
import os, tempfile, gzip |
|
25 |
||
26 |
import bzrlib.store |
|
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
27 |
from bzrlib.store import hash_prefix |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
28 |
from bzrlib.trace import mutter |
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
29 |
from bzrlib.errors import BzrError, FileExists |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
30 |
|
31 |
from StringIO import StringIO |
|
1430
by Robert Collins
touchup the prefixed-store patch |
32 |
from stat import ST_SIZE |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
33 |
|
1092.2.24
by Robert Collins
merge from martins newformat branch - brings in transport abstraction |
34 |
class CompressedTextStore(bzrlib.store.TransportStore): |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
35 |
"""Store that holds files indexed by unique names.
|
36 |
||
37 |
Files can be added, but not modified once they are in. Typically
|
|
38 |
the hash is used as the name, or something else known to be unique,
|
|
39 |
such as a UUID.
|
|
40 |
||
41 |
Files are stored gzip compressed, with no delta compression.
|
|
42 |
||
43 |
>>> st = ScratchCompressedTextStore()
|
|
44 |
||
45 |
>>> st.add(StringIO('hello'), 'aa')
|
|
46 |
>>> 'aa' in st
|
|
47 |
True
|
|
48 |
>>> 'foo' in st
|
|
49 |
False
|
|
50 |
||
51 |
You are not allowed to add an id that is already present.
|
|
52 |
||
53 |
Entries can be retrieved as files, which may then be read.
|
|
54 |
||
55 |
>>> st.add(StringIO('goodbye'), '123123')
|
|
56 |
>>> st['123123'].read()
|
|
57 |
'goodbye'
|
|
58 |
"""
|
|
59 |
||
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
60 |
def __init__(self, transport, prefixed=False): |
1092.2.24
by Robert Collins
merge from martins newformat branch - brings in transport abstraction |
61 |
super(CompressedTextStore, self).__init__(transport) |
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
62 |
self._prefixed = prefixed |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
63 |
|
64 |
def _check_fileid(self, fileid): |
|
65 |
if '\\' in fileid or '/' in fileid: |
|
66 |
raise ValueError("invalid store id %r" % fileid) |
|
67 |
||
68 |
def _relpath(self, fileid): |
|
69 |
self._check_fileid(fileid) |
|
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
70 |
if self._prefixed: |
71 |
return hash_prefix(fileid) + fileid + ".gz" |
|
72 |
else: |
|
73 |
return fileid + ".gz" |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
74 |
|
75 |
def add(self, f, fileid): |
|
76 |
"""Add contents of a file into the store.
|
|
77 |
||
78 |
f -- An open file, or file-like object."""
|
|
79 |
# TODO: implement an add_multi which can do some of it's
|
|
80 |
# own piplelining, and possible take advantage of
|
|
81 |
# transport.put_multi(). The problem is that
|
|
82 |
# entries potentially need to be compressed as they
|
|
83 |
# are received, which implies translation, which
|
|
84 |
# means it isn't as straightforward as we would like.
|
|
85 |
from cStringIO import StringIO |
|
86 |
from bzrlib.osutils import pumpfile |
|
87 |
||
88 |
mutter("add store entry %r" % (fileid)) |
|
89 |
if isinstance(f, basestring): |
|
90 |
f = StringIO(f) |
|
91 |
||
92 |
fn = self._relpath(fileid) |
|
93 |
if self._transport.has(fn): |
|
94 |
raise BzrError("store %r already contains id %r" % (self._transport.base, fileid)) |
|
95 |
||
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
96 |
if self._prefixed: |
97 |
try: |
|
98 |
self._transport.mkdir(hash_prefix(fileid)) |
|
99 |
except FileExists: |
|
100 |
pass
|
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
101 |
|
102 |
sio = StringIO() |
|
103 |
gf = gzip.GzipFile(mode='wb', fileobj=sio) |
|
104 |
# if pumpfile handles files that don't fit in ram,
|
|
105 |
# so will this function
|
|
106 |
if isinstance(f, basestring): |
|
107 |
gf.write(f) |
|
108 |
else: |
|
109 |
pumpfile(f, gf) |
|
110 |
gf.close() |
|
111 |
sio.seek(0) |
|
112 |
self._transport.put(fn, sio) |
|
113 |
||
114 |
def _do_copy(self, other, to_copy, pb, permit_failure=False): |
|
115 |
if isinstance(other, CompressedTextStore): |
|
116 |
return self._copy_multi_text(other, to_copy, pb, |
|
117 |
permit_failure=permit_failure) |
|
118 |
return super(CompressedTextStore, self)._do_copy(other, to_copy, |
|
119 |
pb, permit_failure=permit_failure) |
|
120 |
||
121 |
def _copy_multi_text(self, other, to_copy, pb, |
|
122 |
permit_failure=False): |
|
123 |
# Because of _transport, we can no longer assume
|
|
124 |
# that they are on the same filesystem, we can, however
|
|
125 |
# assume that we only need to copy the exact bytes,
|
|
126 |
# we don't need to process the files.
|
|
127 |
||
128 |
failed = set() |
|
129 |
if permit_failure: |
|
130 |
new_to_copy = set() |
|
1185.11.3
by John Arbash Meinel
Got some more tests to pass, still broken. |
131 |
for fileid, has in zip(to_copy, other.has(to_copy)): |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
132 |
if has: |
133 |
new_to_copy.add(fileid) |
|
134 |
else: |
|
135 |
failed.add(fileid) |
|
136 |
to_copy = new_to_copy |
|
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
137 |
#mutter('_copy_multi_text copying %s, failed %s' % (to_copy, failed))
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
138 |
|
139 |
paths = [self._relpath(fileid) for fileid in to_copy] |
|
140 |
count = other._transport.copy_to(paths, self._transport, pb=pb) |
|
141 |
assert count == len(to_copy) |
|
142 |
return count, failed |
|
143 |
||
144 |
def __contains__(self, fileid): |
|
145 |
""""""
|
|
146 |
fn = self._relpath(fileid) |
|
147 |
return self._transport.has(fn) |
|
148 |
||
149 |
def has(self, fileids, pb=None): |
|
150 |
"""Return True/False for each entry in fileids.
|
|
151 |
||
152 |
:param fileids: A List or generator yielding file ids.
|
|
153 |
:return: A generator or list returning True/False for each entry.
|
|
154 |
"""
|
|
155 |
relpaths = (self._relpath(fid) for fid in fileids) |
|
156 |
return self._transport.has_multi(relpaths, pb=pb) |
|
157 |
||
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
158 |
def get(self, fileids, permit_failure=False, pb=None): |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
159 |
"""Return a set of files, one for each requested entry.
|
160 |
|
|
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
161 |
TODO: Write some tests to make sure that permit_failure is
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
162 |
handled correctly.
|
163 |
||
164 |
TODO: What should the exception be for a missing file?
|
|
165 |
KeyError, or NoSuchFile?
|
|
166 |
"""
|
|
167 |
||
168 |
# This next code gets a bit hairy because it can allow
|
|
169 |
# to not request a file which doesn't seem to exist.
|
|
170 |
# Also, the same fileid may be requested twice, so we
|
|
171 |
# can't just build up a map.
|
|
172 |
rel_paths = [self._relpath(fid) for fid in fileids] |
|
173 |
is_requested = [] |
|
174 |
||
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
175 |
#mutter('CompressedTextStore.get(permit_failure=%s)' % permit_failure)
|
176 |
if permit_failure: |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
177 |
existing_paths = [] |
178 |
for path, has in zip(rel_paths, |
|
179 |
self._transport.has_multi(rel_paths)): |
|
180 |
if has: |
|
181 |
existing_paths.append(path) |
|
182 |
is_requested.append(True) |
|
183 |
else: |
|
184 |
is_requested.append(False) |
|
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
185 |
#mutter('Retrieving %s out of %s' % (existing_paths, rel_paths))
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
186 |
else: |
1185.11.15
by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc |
187 |
#mutter('Retrieving all %s' % (rel_paths, ))
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
188 |
existing_paths = rel_paths |
189 |
is_requested = [True for x in rel_paths] |
|
190 |
||
191 |
count = 0 |
|
192 |
for f in self._transport.get_multi(existing_paths, pb=pb): |
|
193 |
assert count < len(is_requested) |
|
194 |
while not is_requested[count]: |
|
195 |
yield None |
|
196 |
count += 1 |
|
197 |
if hasattr(f, 'tell'): |
|
198 |
yield gzip.GzipFile(mode='rb', fileobj=f) |
|
199 |
else: |
|
200 |
from cStringIO import StringIO |
|
201 |
sio = StringIO(f.read()) |
|
202 |
yield gzip.GzipFile(mode='rb', fileobj=sio) |
|
203 |
count += 1 |
|
204 |
||
205 |
while count < len(is_requested): |
|
206 |
yield None |
|
207 |
count += 1 |
|
208 |
||
209 |
def __iter__(self): |
|
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
210 |
for relpath, st in self._iter_relpaths(): |
211 |
if relpath.endswith(".gz"): |
|
212 |
yield os.path.basename(relpath)[:-3] |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
213 |
else: |
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
214 |
yield os.path.basename(relpath) |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
215 |
|
216 |
def __len__(self): |
|
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
217 |
return len(list(self._iter_relpath())) |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
218 |
|
219 |
def __getitem__(self, fileid): |
|
220 |
"""Returns a file reading from a particular entry."""
|
|
1433
by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages. |
221 |
f = super(CompressedTextStore, self).__getitem__(fileid) |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
222 |
# gzip.GzipFile.read() requires a tell() function
|
223 |
# but some transports return objects that cannot seek
|
|
224 |
# so buffer them in a StringIO instead
|
|
225 |
if hasattr(f, 'tell'): |
|
226 |
return gzip.GzipFile(mode='rb', fileobj=f) |
|
227 |
else: |
|
228 |
from cStringIO import StringIO |
|
229 |
sio = StringIO(f.read()) |
|
230 |
return gzip.GzipFile(mode='rb', fileobj=sio) |
|
231 |
||
232 |
def total_size(self): |
|
233 |
"""Return (count, bytes)
|
|
234 |
||
235 |
This is the (compressed) size stored on disk, not the size of
|
|
236 |
the content."""
|
|
237 |
total = 0 |
|
238 |
count = 0 |
|
1429
by Robert Collins
merge in niemeyers prefixed-store patch |
239 |
for relpath, st in self._iter_relpaths(): |
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
240 |
count += 1 |
241 |
total += st[ST_SIZE] |
|
242 |
||
243 |
return count, total |
|
244 |
||
1092.2.24
by Robert Collins
merge from martins newformat branch - brings in transport abstraction |
245 |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
246 |
class ScratchCompressedTextStore(CompressedTextStore): |
247 |
"""Self-destructing test subclass of CompressedTextStore.
|
|
248 |
||
249 |
The Store only exists for the lifetime of the Python object.
|
|
250 |
Obviously you should not put anything precious in it.
|
|
251 |
"""
|
|
252 |
def __init__(self): |
|
253 |
from transport import transport |
|
1393.2.3
by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass. |
254 |
t = transport(tempfile.mkdtemp()) |
255 |
super(ScratchCompressedTextStore, self).__init__(t) |
|
1185.11.1
by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet. |
256 |
|
257 |
def __del__(self): |
|
258 |
self._transport.delete_multi(self._transport.list_dir('.')) |
|
259 |
os.rmdir(self._transport.base) |
|
260 |
mutter("%r destroyed" % self) |
|
261 |