711
by Martin Pool
- store docs |
1 |
# Copyright (C) 2005 by Canonical Development Ltd
|
1
by mbp at sourcefrog
import from baz patch-364 |
2 |
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
||
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
||
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
711
by Martin Pool
- store docs |
17 |
"""
|
18 |
Stores are the main data-storage mechanism for Bazaar-NG.
|
|
1
by mbp at sourcefrog
import from baz patch-364 |
19 |
|
20 |
A store is a simple write-once container indexed by a universally
|
|
711
by Martin Pool
- store docs |
21 |
unique ID.
|
22 |
"""
|
|
1
by mbp at sourcefrog
import from baz patch-364 |
23 |
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
24 |
import os, tempfile, osutils, gzip, errno |
81
by mbp at sourcefrog
show space usage for various stores in the info command |
25 |
from stat import ST_SIZE |
1
by mbp at sourcefrog
import from baz patch-364 |
26 |
from StringIO import StringIO |
27 |
from trace import mutter |
|
28 |
||
29 |
######################################################################
|
|
30 |
# stores
|
|
31 |
||
32 |
class StoreError(Exception): |
|
33 |
pass
|
|
34 |
||
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
35 |
class Storage(object): |
36 |
"""This class represents the abstract storage layout for saving information.
|
|
37 |
"""
|
|
907.1.24
by John Arbash Meinel
Remote functionality work. |
38 |
_transport = None |
39 |
_max_buffered_requests = 10 |
|
40 |
||
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
41 |
def __init__(self, transport): |
907.1.24
by John Arbash Meinel
Remote functionality work. |
42 |
from transport import Transport |
43 |
assert isinstance(transport, Transport) |
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
44 |
self._transport = transport |
45 |
||
46 |
def __repr__(self): |
|
907.1.24
by John Arbash Meinel
Remote functionality work. |
47 |
if self._transport is None: |
48 |
return "%s(None)" % (self.__class__.__name__) |
|
49 |
else: |
|
50 |
return "%s(%r)" % (self.__class__.__name__, self._transport.base) |
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
51 |
|
907.1.6
by John Arbash Meinel
typo fixes. |
52 |
__str__ = __repr__ |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
53 |
|
54 |
def __len__(self): |
|
55 |
raise NotImplementedError('Children should define their length') |
|
56 |
||
57 |
def __getitem__(self, fileid): |
|
58 |
"""Returns a file reading from a particular entry."""
|
|
59 |
raise NotImplementedError |
|
60 |
||
61 |
def __contains__(self, fileid): |
|
62 |
""""""
|
|
63 |
raise NotImplementedError |
|
64 |
||
65 |
def __iter__(self): |
|
66 |
raise NotImplementedError |
|
67 |
||
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
68 |
def add(self, fileid, f): |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
69 |
"""Add a file object f to the store accessible from the given fileid"""
|
70 |
raise NotImplementedError('Children of Storage must define their method of adding entries.') |
|
71 |
||
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
72 |
def add_multi(self, entries): |
73 |
"""Add a series of file-like or string objects to the store with the given
|
|
74 |
identities.
|
|
75 |
|
|
76 |
:param entries: A list of tuples of id,file pairs [(id1, file1), (id2, file2), ...]
|
|
77 |
This could also be a generator yielding (id,file) pairs.
|
|
78 |
"""
|
|
79 |
for fileid, f in entries: |
|
80 |
self.add(fileid, f) |
|
81 |
||
907.1.36
by John Arbash Meinel
Moving the multi-get functionality higher up into the Branch class. |
82 |
def has(self, fileids): |
83 |
"""Return True/False for each entry in fileids.
|
|
84 |
||
85 |
:param fileids: A List or generator yielding file ids.
|
|
86 |
:return: A generator or list returning True/False for each entry.
|
|
87 |
"""
|
|
88 |
for fileid in fileids: |
|
89 |
if fileid in self: |
|
90 |
yield True |
|
91 |
else: |
|
92 |
yield False |
|
93 |
||
94 |
def get(self, fileids, pb=None): |
|
95 |
"""Return a set of files, one for each requested entry."""
|
|
96 |
for fileid in fileids: |
|
97 |
yield self[fileid] |
|
98 |
||
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
99 |
def copy_multi(self, other, ids): |
100 |
"""Copy texts for ids from other into self.
|
|
101 |
||
102 |
If an id is present in self, it is skipped. A count of copied
|
|
103 |
ids is returned, which may be less than len(ids).
|
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
104 |
|
105 |
:param other: Another Storage object
|
|
106 |
:param ids: A list of entry ids to be copied
|
|
107 |
:return: The number of entries copied
|
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
108 |
"""
|
109 |
from bzrlib.progress import ProgressBar |
|
110 |
pb = ProgressBar() |
|
111 |
pb.update('preparing to copy') |
|
907.1.16
by John Arbash Meinel
Fixing a few cut&paste typos. |
112 |
to_copy = [fileid for fileid in ids if fileid not in self] |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
113 |
return self._do_copy(other, to_copy, pb) |
114 |
||
115 |
def _do_copy(self, other, to_copy, pb): |
|
116 |
"""This is the standard copying mechanism, just get them one at
|
|
117 |
a time from remote, and store them locally.
|
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
118 |
|
119 |
:param other: Another Storage object
|
|
120 |
:param to_copy: A list of entry ids to copy
|
|
121 |
:param pb: A ProgressBar object to display completion status.
|
|
122 |
:return: The number of entries copied.
|
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
123 |
"""
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
124 |
# This should be updated to use add_multi() rather than
|
125 |
# the current methods of buffering requests.
|
|
126 |
# One question, is it faster to queue up 1-10 and then copy 1-10
|
|
127 |
# then queue up 11-20, copy 11-20
|
|
128 |
# or to queue up 1-10, copy 1, queue 11, copy 2, etc?
|
|
129 |
# sort of pipeline versus batch.
|
|
907.1.30
by John Arbash Meinel
Updated CompressedTextStore to use copy_to when possible. |
130 |
|
131 |
# We can't use self._transport.copy_to because we don't know
|
|
132 |
# whether the local tree is in the same format as other
|
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
133 |
def buffer_requests(): |
907.1.26
by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works. |
134 |
count = 0 |
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
135 |
buffered_requests = [] |
136 |
for fileid in to_copy: |
|
137 |
buffered_requests.append((fileid, other[fileid])) |
|
138 |
if len(buffered_requests) > self._max_buffered_requests: |
|
139 |
yield buffered_requests.pop(0) |
|
140 |
count += 1 |
|
141 |
pb.update('copy', count, len(to_copy)) |
|
142 |
||
143 |
for req in buffered_requests: |
|
144 |
yield req |
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
145 |
count += 1 |
146 |
pb.update('copy', count, len(to_copy)) |
|
147 |
||
907.1.26
by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works. |
148 |
assert count == len(to_copy) |
149 |
||
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
150 |
self.add_multi(buffer_requests()) |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
151 |
|
152 |
pb.clear() |
|
907.1.26
by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works. |
153 |
return len(to_copy) |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
154 |
|
155 |
class CompressedTextStore(Storage): |
|
1
by mbp at sourcefrog
import from baz patch-364 |
156 |
"""Store that holds files indexed by unique names.
|
157 |
||
158 |
Files can be added, but not modified once they are in. Typically
|
|
159 |
the hash is used as the name, or something else known to be unique,
|
|
160 |
such as a UUID.
|
|
161 |
||
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
162 |
Files are stored gzip compressed, with no delta compression.
|
163 |
||
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
164 |
>>> st = ScratchCompressedTextStore()
|
1
by mbp at sourcefrog
import from baz patch-364 |
165 |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
166 |
>>> st.add('aa', StringIO('hello'))
|
1
by mbp at sourcefrog
import from baz patch-364 |
167 |
>>> 'aa' in st
|
168 |
True
|
|
169 |
>>> 'foo' in st
|
|
170 |
False
|
|
171 |
||
172 |
You are not allowed to add an id that is already present.
|
|
173 |
||
174 |
Entries can be retrieved as files, which may then be read.
|
|
175 |
||
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
176 |
>>> st.add('123123', StringIO('goodbye'))
|
1
by mbp at sourcefrog
import from baz patch-364 |
177 |
>>> st['123123'].read()
|
178 |
'goodbye'
|
|
179 |
||
254
by Martin Pool
- Doc cleanups from Magnus Therning |
180 |
TODO: Atomic add by writing to a temporary file and renaming.
|
1
by mbp at sourcefrog
import from baz patch-364 |
181 |
|
711
by Martin Pool
- store docs |
182 |
In bzr 0.0.5 and earlier, files within the store were marked
|
183 |
readonly on disk. This is no longer done but existing stores need
|
|
184 |
to be accomodated.
|
|
1
by mbp at sourcefrog
import from baz patch-364 |
185 |
"""
|
186 |
||
187 |
def __init__(self, basedir): |
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
188 |
super(CompressedTextStore, self).__init__(basedir) |
1
by mbp at sourcefrog
import from baz patch-364 |
189 |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
190 |
def _check_fileid(self, fileid): |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
191 |
if '\\' in fileid or '/' in fileid: |
192 |
raise ValueError("invalid store id %r" % fileid) |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
193 |
|
194 |
def _relpath(self, fileid): |
|
195 |
self._check_fileid(fileid) |
|
196 |
return fileid + '.gz' |
|
1
by mbp at sourcefrog
import from baz patch-364 |
197 |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
198 |
def add(self, fileid, f): |
1
by mbp at sourcefrog
import from baz patch-364 |
199 |
"""Add contents of a file into the store.
|
200 |
||
254
by Martin Pool
- Doc cleanups from Magnus Therning |
201 |
f -- An open file, or file-like object."""
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
202 |
# TODO: implement an add_multi which can do some of it's
|
203 |
# own piplelining, and possible take advantage of
|
|
204 |
# transport.put_multi(). The problem is that
|
|
205 |
# entries potentially need to be compressed as they
|
|
206 |
# are received, which implies translation, which
|
|
207 |
# means it isn't as straightforward as we would like.
|
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
208 |
from cStringIO import StringIO |
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
209 |
from bzrlib.osutils import pumpfile |
716
by Martin Pool
- write into store using AtomicFile |
210 |
|
1
by mbp at sourcefrog
import from baz patch-364 |
211 |
mutter("add store entry %r" % (fileid)) |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
212 |
if isinstance(f, basestring): |
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
213 |
f = StringIO(f) |
214 |
||
215 |
fn = self._relpath(fileid) |
|
216 |
if self._transport.has(fn): |
|
217 |
raise BzrError("store %r already contains id %r" % (self._transport.base, fileid)) |
|
218 |
||
219 |
||
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
220 |
sio = StringIO() |
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
221 |
gf = gzip.GzipFile(mode='wb', fileobj=sio) |
222 |
# if pumpfile handles files that don't fit in ram,
|
|
223 |
# so will this function
|
|
907.1.24
by John Arbash Meinel
Remote functionality work. |
224 |
if isinstance(f, basestring): |
225 |
gf.write(f) |
|
226 |
else: |
|
227 |
pumpfile(f, gf) |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
228 |
gf.close() |
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
229 |
sio.seek(0) |
230 |
self._transport.put(fn, sio) |
|
231 |
||
232 |
def _do_copy(self, other, to_copy, pb): |
|
233 |
if isinstance(other, CompressedTextStore): |
|
234 |
return self._copy_multi_text(other, to_copy, pb) |
|
235 |
return super(CompressedTextStore, self)._do_copy(other, to_copy, pb) |
|
236 |
||
237 |
def _copy_multi_text(self, other, to_copy, pb): |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
238 |
# Because of _transport, we can no longer assume
|
239 |
# that they are on the same filesystem, we can, however
|
|
240 |
# assume that we only need to copy the exact bytes,
|
|
241 |
# we don't need to process the files.
|
|
242 |
||
243 |
paths = [self._relpath(fileid) for fileid in to_copy] |
|
907.1.30
by John Arbash Meinel
Updated CompressedTextStore to use copy_to when possible. |
244 |
count = other._transport.copy_to(paths, self._transport, pb=pb) |
790
by Martin Pool
Merge from aaron: |
245 |
assert count == len(to_copy) |
246 |
pb.clear() |
|
247 |
return count |
|
1
by mbp at sourcefrog
import from baz patch-364 |
248 |
|
249 |
def __contains__(self, fileid): |
|
250 |
""""""
|
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
251 |
fn = self._relpath(fileid) |
252 |
return self._transport.has(fn) |
|
1
by mbp at sourcefrog
import from baz patch-364 |
253 |
|
907.1.36
by John Arbash Meinel
Moving the multi-get functionality higher up into the Branch class. |
254 |
def has(self, fileids, pb=None): |
255 |
"""Return True/False for each entry in fileids.
|
|
256 |
||
257 |
:param fileids: A List or generator yielding file ids.
|
|
258 |
:return: A generator or list returning True/False for each entry.
|
|
259 |
"""
|
|
260 |
relpaths = [self._relpath(fid) for fid in fileids] |
|
261 |
return self._transport.has_multi(relpaths, pb=pb) |
|
262 |
||
263 |
def get(self, fileids, pb=None): |
|
264 |
"""Return a set of files, one for each requested entry."""
|
|
265 |
rel_paths = [self._relpath(fid) for fid in fileids] |
|
266 |
for f in self._transport.get_multi(rel_paths, pb=pb): |
|
267 |
if hasattr(f, 'tell'): |
|
268 |
yield gzip.GzipFile(mode='rb', fileobj=f) |
|
269 |
else: |
|
270 |
from cStringIO import StringIO |
|
271 |
sio = StringIO(f.read()) |
|
272 |
yield gzip.GzipFile(mode='rb', fileobj=sio) |
|
1
by mbp at sourcefrog
import from baz patch-364 |
273 |
|
274 |
def __iter__(self): |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
275 |
# TODO: case-insensitive?
|
276 |
for f in self._transport.list_dir('.'): |
|
128
by mbp at sourcefrog
More support for compressed files in stores |
277 |
if f[-3:] == '.gz': |
278 |
yield f[:-3] |
|
279 |
else: |
|
280 |
yield f |
|
1
by mbp at sourcefrog
import from baz patch-364 |
281 |
|
80
by mbp at sourcefrog
show_info: Show number of entries in the branch stores |
282 |
def __len__(self): |
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
283 |
return len([f for f in self._transport.list_dir('.')]) |
80
by mbp at sourcefrog
show_info: Show number of entries in the branch stores |
284 |
|
1
by mbp at sourcefrog
import from baz patch-364 |
285 |
def __getitem__(self, fileid): |
286 |
"""Returns a file reading from a particular entry."""
|
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
287 |
fn = self._relpath(fileid) |
288 |
f = self._transport.get(fn) |
|
907.1.24
by John Arbash Meinel
Remote functionality work. |
289 |
|
290 |
# gzip.GzipFile.read() requires a tell() function
|
|
291 |
# but some transports return objects that cannot seek
|
|
292 |
# so buffer them in a StringIO instead
|
|
293 |
if hasattr(f, 'tell'): |
|
294 |
return gzip.GzipFile(mode='rb', fileobj=f) |
|
295 |
else: |
|
296 |
from cStringIO import StringIO |
|
297 |
sio = StringIO(f.read()) |
|
298 |
return gzip.GzipFile(mode='rb', fileobj=sio) |
|
299 |
||
1
by mbp at sourcefrog
import from baz patch-364 |
300 |
|
81
by mbp at sourcefrog
show space usage for various stores in the info command |
301 |
def total_size(self): |
127
by mbp at sourcefrog
- store support for retrieving compressed files |
302 |
"""Return (count, bytes)
|
303 |
||
304 |
This is the (compressed) size stored on disk, not the size of
|
|
305 |
the content."""
|
|
81
by mbp at sourcefrog
show space usage for various stores in the info command |
306 |
total = 0 |
307 |
count = 0 |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
308 |
relpaths = [self._relpath(fid) for fid in self] |
309 |
for st in self._transport.stat_multi(relpaths): |
|
81
by mbp at sourcefrog
show space usage for various stores in the info command |
310 |
count += 1 |
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
311 |
total += st[ST_SIZE] |
128
by mbp at sourcefrog
More support for compressed files in stores |
312 |
|
81
by mbp at sourcefrog
show space usage for various stores in the info command |
313 |
return count, total |
314 |
||
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
315 |
class ScratchCompressedTextStore(CompressedTextStore): |
907.1.16
by John Arbash Meinel
Fixing a few cut&paste typos. |
316 |
"""Self-destructing test subclass of CompressedTextStore.
|
1
by mbp at sourcefrog
import from baz patch-364 |
317 |
|
318 |
The Store only exists for the lifetime of the Python object.
|
|
907.1.1
by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer. |
319 |
Obviously you should not put anything precious in it.
|
1
by mbp at sourcefrog
import from baz patch-364 |
320 |
"""
|
321 |
def __init__(self): |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
322 |
from transport import transport |
323 |
super(ScratchCompressedTextStore, self).__init__(transport(tempfile.mkdtemp())) |
|
1
by mbp at sourcefrog
import from baz patch-364 |
324 |
|
325 |
def __del__(self): |
|
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
326 |
self._transport.delete_multi(self._transport.list_dir('.')) |
327 |
os.rmdir(self._transport.base) |
|
130
by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression, |
328 |
mutter("%r destroyed" % self) |
907.1.2
by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport. |
329 |