~bzr-pqm/bzr/bzr.dev

711 by Martin Pool
- store docs
1
# Copyright (C) 2005 by Canonical Development Ltd
1 by mbp at sourcefrog
import from baz patch-364
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
711 by Martin Pool
- store docs
17
"""
18
Stores are the main data-storage mechanism for Bazaar-NG.
1 by mbp at sourcefrog
import from baz patch-364
19
20
A store is a simple write-once container indexed by a universally
711 by Martin Pool
- store docs
21
unique ID.
22
"""
1 by mbp at sourcefrog
import from baz patch-364
23
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
24
import os, tempfile, osutils, gzip, errno
81 by mbp at sourcefrog
show space usage for various stores in the info command
25
from stat import ST_SIZE
1 by mbp at sourcefrog
import from baz patch-364
26
from StringIO import StringIO
27
from trace import mutter
28
29
######################################################################
30
# stores
31
32
class StoreError(Exception):
33
    pass
34
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
35
class Storage(object):
36
    """This class represents the abstract storage layout for saving information.
37
    """
907.1.24 by John Arbash Meinel
Remote functionality work.
38
    _transport = None
39
    _max_buffered_requests = 10
40
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
41
    def __init__(self, transport):
907.1.24 by John Arbash Meinel
Remote functionality work.
42
        from transport import Transport
43
        assert isinstance(transport, Transport)
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
44
        self._transport = transport
45
46
    def __repr__(self):
907.1.24 by John Arbash Meinel
Remote functionality work.
47
        if self._transport is None:
48
            return "%s(None)" % (self.__class__.__name__)
49
        else:
50
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
51
907.1.6 by John Arbash Meinel
typo fixes.
52
    __str__ = __repr__
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
53
54
    def __len__(self):
55
        raise NotImplementedError('Children should define their length')
56
57
    def __getitem__(self, fileid):
58
        """Returns a file reading from a particular entry."""
59
        raise NotImplementedError
60
61
    def __contains__(self, fileid):
62
        """"""
63
        raise NotImplementedError
64
65
    def __iter__(self):
66
        raise NotImplementedError
67
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
68
    def add(self, fileid, f):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
69
        """Add a file object f to the store accessible from the given fileid"""
70
        raise NotImplementedError('Children of Storage must define their method of adding entries.')
71
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
72
    def add_multi(self, entries):
73
        """Add a series of file-like or string objects to the store with the given
74
        identities.
75
        
76
        :param entries: A list of tuples of id,file pairs [(id1, file1), (id2, file2), ...]
77
                        This could also be a generator yielding (id,file) pairs.
78
        """
79
        for fileid, f in entries:
80
            self.add(fileid, f)
81
907.1.36 by John Arbash Meinel
Moving the multi-get functionality higher up into the Branch class.
82
    def has(self, fileids):
83
        """Return True/False for each entry in fileids.
84
85
        :param fileids: A List or generator yielding file ids.
86
        :return: A generator or list returning True/False for each entry.
87
        """
88
        for fileid in fileids:
89
            if fileid in self:
90
                yield True
91
            else:
92
                yield False
93
94
    def get(self, fileids, pb=None):
95
        """Return a set of files, one for each requested entry."""
96
        for fileid in fileids:
97
            yield self[fileid]
98
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
99
    def copy_multi(self, other, ids):
100
        """Copy texts for ids from other into self.
101
102
        If an id is present in self, it is skipped.  A count of copied
103
        ids is returned, which may be less than len(ids).
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
104
105
        :param other: Another Storage object
106
        :param ids: A list of entry ids to be copied
107
        :return: The number of entries copied
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
108
        """
109
        from bzrlib.progress import ProgressBar
110
        pb = ProgressBar()
111
        pb.update('preparing to copy')
907.1.16 by John Arbash Meinel
Fixing a few cut&paste typos.
112
        to_copy = [fileid for fileid in ids if fileid not in self]
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
113
        return self._do_copy(other, to_copy, pb)
114
115
    def _do_copy(self, other, to_copy, pb):
116
        """This is the standard copying mechanism, just get them one at
117
        a time from remote, and store them locally.
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
118
119
        :param other: Another Storage object
120
        :param to_copy: A list of entry ids to copy
121
        :param pb: A ProgressBar object to display completion status.
122
        :return: The number of entries copied.
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
123
        """
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
124
        # This should be updated to use add_multi() rather than
125
        # the current methods of buffering requests.
126
        # One question, is it faster to queue up 1-10 and then copy 1-10
127
        # then queue up 11-20, copy 11-20
128
        # or to queue up 1-10, copy 1, queue 11, copy 2, etc?
129
        # sort of pipeline versus batch.
907.1.30 by John Arbash Meinel
Updated CompressedTextStore to use copy_to when possible.
130
131
        # We can't use self._transport.copy_to because we don't know
132
        # whether the local tree is in the same format as other
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
133
        def buffer_requests():
907.1.26 by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works.
134
            count = 0
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
135
            buffered_requests = []
136
            for fileid in to_copy:
137
                buffered_requests.append((fileid, other[fileid]))
138
                if len(buffered_requests) > self._max_buffered_requests:
139
                    yield buffered_requests.pop(0)
140
                    count += 1
141
                    pb.update('copy', count, len(to_copy))
142
143
            for req in buffered_requests:
144
                yield req
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
145
                count += 1
146
                pb.update('copy', count, len(to_copy))
147
907.1.26 by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works.
148
            assert count == len(to_copy)
149
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
150
        self.add_multi(buffer_requests())
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
151
152
        pb.clear()
907.1.26 by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works.
153
        return len(to_copy)
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
154
155
class CompressedTextStore(Storage):
1 by mbp at sourcefrog
import from baz patch-364
156
    """Store that holds files indexed by unique names.
157
158
    Files can be added, but not modified once they are in.  Typically
159
    the hash is used as the name, or something else known to be unique,
160
    such as a UUID.
161
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
162
    Files are stored gzip compressed, with no delta compression.
163
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
164
    >>> st = ScratchCompressedTextStore()
1 by mbp at sourcefrog
import from baz patch-364
165
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
166
    >>> st.add('aa', StringIO('hello'))
1 by mbp at sourcefrog
import from baz patch-364
167
    >>> 'aa' in st
168
    True
169
    >>> 'foo' in st
170
    False
171
172
    You are not allowed to add an id that is already present.
173
174
    Entries can be retrieved as files, which may then be read.
175
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
176
    >>> st.add('123123', StringIO('goodbye'))
1 by mbp at sourcefrog
import from baz patch-364
177
    >>> st['123123'].read()
178
    'goodbye'
179
254 by Martin Pool
- Doc cleanups from Magnus Therning
180
    TODO: Atomic add by writing to a temporary file and renaming.
1 by mbp at sourcefrog
import from baz patch-364
181
711 by Martin Pool
- store docs
182
    In bzr 0.0.5 and earlier, files within the store were marked
183
    readonly on disk.  This is no longer done but existing stores need
184
    to be accomodated.
1 by mbp at sourcefrog
import from baz patch-364
185
    """
186
187
    def __init__(self, basedir):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
188
        super(CompressedTextStore, self).__init__(basedir)
1 by mbp at sourcefrog
import from baz patch-364
189
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
190
    def _check_fileid(self, fileid):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
191
        if '\\' in fileid or '/' in fileid:
192
            raise ValueError("invalid store id %r" % fileid)
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
193
194
    def _relpath(self, fileid):
195
        self._check_fileid(fileid)
196
        return fileid + '.gz'
1 by mbp at sourcefrog
import from baz patch-364
197
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
198
    def add(self, fileid, f):
1 by mbp at sourcefrog
import from baz patch-364
199
        """Add contents of a file into the store.
200
254 by Martin Pool
- Doc cleanups from Magnus Therning
201
        f -- An open file, or file-like object."""
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
202
        # TODO: implement an add_multi which can do some of it's
203
        #       own piplelining, and possible take advantage of
204
        #       transport.put_multi(). The problem is that
205
        #       entries potentially need to be compressed as they
206
        #       are received, which implies translation, which
207
        #       means it isn't as straightforward as we would like.
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
208
        from cStringIO import StringIO
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
209
        from bzrlib.osutils import pumpfile
716 by Martin Pool
- write into store using AtomicFile
210
        
1 by mbp at sourcefrog
import from baz patch-364
211
        mutter("add store entry %r" % (fileid))
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
212
        if isinstance(f, basestring):
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
213
            f = StringIO(f)
214
            
215
        fn = self._relpath(fileid)
216
        if self._transport.has(fn):
217
            raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))
218
219
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
220
        sio = StringIO()
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
221
        gf = gzip.GzipFile(mode='wb', fileobj=sio)
222
        # if pumpfile handles files that don't fit in ram,
223
        # so will this function
907.1.24 by John Arbash Meinel
Remote functionality work.
224
        if isinstance(f, basestring):
225
            gf.write(f)
226
        else:
227
            pumpfile(f, gf)
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
228
        gf.close()
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
229
        sio.seek(0)
230
        self._transport.put(fn, sio)
231
232
    def _do_copy(self, other, to_copy, pb):
233
        if isinstance(other, CompressedTextStore):
234
            return self._copy_multi_text(other, to_copy, pb)
235
        return super(CompressedTextStore, self)._do_copy(other, to_copy, pb)
236
237
    def _copy_multi_text(self, other, to_copy, pb):
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
238
        # Because of _transport, we can no longer assume
239
        # that they are on the same filesystem, we can, however
240
        # assume that we only need to copy the exact bytes,
241
        # we don't need to process the files.
242
243
        paths = [self._relpath(fileid) for fileid in to_copy]
907.1.30 by John Arbash Meinel
Updated CompressedTextStore to use copy_to when possible.
244
        count = other._transport.copy_to(paths, self._transport, pb=pb)
790 by Martin Pool
Merge from aaron:
245
        assert count == len(to_copy)
246
        pb.clear()
247
        return count
1 by mbp at sourcefrog
import from baz patch-364
248
249
    def __contains__(self, fileid):
250
        """"""
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
251
        fn = self._relpath(fileid)
252
        return self._transport.has(fn)
1 by mbp at sourcefrog
import from baz patch-364
253
907.1.36 by John Arbash Meinel
Moving the multi-get functionality higher up into the Branch class.
254
    def has(self, fileids, pb=None):
255
        """Return True/False for each entry in fileids.
256
257
        :param fileids: A List or generator yielding file ids.
258
        :return: A generator or list returning True/False for each entry.
259
        """
260
        relpaths = [self._relpath(fid) for fid in fileids]
261
        return self._transport.has_multi(relpaths, pb=pb)
262
263
    def get(self, fileids, pb=None):
264
        """Return a set of files, one for each requested entry."""
265
        rel_paths = [self._relpath(fid) for fid in fileids]
266
        for f in self._transport.get_multi(rel_paths, pb=pb):
267
            if hasattr(f, 'tell'):
268
                yield gzip.GzipFile(mode='rb', fileobj=f)
269
            else:
270
                from cStringIO import StringIO
271
                sio = StringIO(f.read())
272
                yield gzip.GzipFile(mode='rb', fileobj=sio)
1 by mbp at sourcefrog
import from baz patch-364
273
274
    def __iter__(self):
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
275
        # TODO: case-insensitive?
276
        for f in self._transport.list_dir('.'):
128 by mbp at sourcefrog
More support for compressed files in stores
277
            if f[-3:] == '.gz':
278
                yield f[:-3]
279
            else:
280
                yield f
1 by mbp at sourcefrog
import from baz patch-364
281
80 by mbp at sourcefrog
show_info: Show number of entries in the branch stores
282
    def __len__(self):
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
283
        return len([f for f in self._transport.list_dir('.')])
80 by mbp at sourcefrog
show_info: Show number of entries in the branch stores
284
1 by mbp at sourcefrog
import from baz patch-364
285
    def __getitem__(self, fileid):
286
        """Returns a file reading from a particular entry."""
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
287
        fn = self._relpath(fileid)
288
        f = self._transport.get(fn)
907.1.24 by John Arbash Meinel
Remote functionality work.
289
290
        # gzip.GzipFile.read() requires a tell() function
291
        # but some transports return objects that cannot seek
292
        # so buffer them in a StringIO instead
293
        if hasattr(f, 'tell'):
294
            return gzip.GzipFile(mode='rb', fileobj=f)
295
        else:
296
            from cStringIO import StringIO
297
            sio = StringIO(f.read())
298
            return gzip.GzipFile(mode='rb', fileobj=sio)
299
            
1 by mbp at sourcefrog
import from baz patch-364
300
81 by mbp at sourcefrog
show space usage for various stores in the info command
301
    def total_size(self):
127 by mbp at sourcefrog
- store support for retrieving compressed files
302
        """Return (count, bytes)
303
304
        This is the (compressed) size stored on disk, not the size of
305
        the content."""
81 by mbp at sourcefrog
show space usage for various stores in the info command
306
        total = 0
307
        count = 0
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
308
        relpaths = [self._relpath(fid) for fid in self]
309
        for st in self._transport.stat_multi(relpaths):
81 by mbp at sourcefrog
show space usage for various stores in the info command
310
            count += 1
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
311
            total += st[ST_SIZE]
128 by mbp at sourcefrog
More support for compressed files in stores
312
                
81 by mbp at sourcefrog
show space usage for various stores in the info command
313
        return count, total
314
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
315
class ScratchCompressedTextStore(CompressedTextStore):
907.1.16 by John Arbash Meinel
Fixing a few cut&paste typos.
316
    """Self-destructing test subclass of CompressedTextStore.
1 by mbp at sourcefrog
import from baz patch-364
317
318
    The Store only exists for the lifetime of the Python object.
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
319
    Obviously you should not put anything precious in it.
1 by mbp at sourcefrog
import from baz patch-364
320
    """
321
    def __init__(self):
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
322
        from transport import transport
323
        super(ScratchCompressedTextStore, self).__init__(transport(tempfile.mkdtemp()))
1 by mbp at sourcefrog
import from baz patch-364
324
325
    def __del__(self):
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
326
        self._transport.delete_multi(self._transport.list_dir('.'))
327
        os.rmdir(self._transport.base)
130 by mbp at sourcefrog
- fixup checks on retrieved files to cope with compression,
328
        mutter("%r destroyed" % self)
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
329