~bzr-pqm/bzr/bzr.dev

711 by Martin Pool
- store docs
1
# Copyright (C) 2005 by Canonical Development Ltd
1 by mbp at sourcefrog
import from baz patch-364
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1374 by Martin Pool
todo
17
# TODO: Could remember a bias towards whether a particular store is typically
18
# compressed or not.
19
711 by Martin Pool
- store docs
20
"""
21
Stores are the main data-storage mechanism for Bazaar-NG.
1 by mbp at sourcefrog
import from baz patch-364
22
23
A store is a simple write-once container indexed by a universally
711 by Martin Pool
- store docs
24
unique ID.
25
"""
1 by mbp at sourcefrog
import from baz patch-364
26
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
27
from cStringIO import StringIO
1429 by Robert Collins
merge in niemeyers prefixed-store patch
28
from zlib import adler32
1185.1.41 by Robert Collins
massive patch from Alexander Belchenko - many PEP8 fixes, removes unused function uuid
29
1442.1.44 by Robert Collins
Many transport related tweaks:
30
import bzrlib
1433 by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages.
31
import bzrlib.errors as errors
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
32
from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible
1104 by Martin Pool
- Add a simple UIFactory
33
from bzrlib.trace import mutter
1442.1.44 by Robert Collins
Many transport related tweaks:
34
import bzrlib.transport as transport
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
35
from bzrlib.transport.local import LocalTransport
1 by mbp at sourcefrog
import from baz patch-364
36
37
######################################################################
38
# stores
39
40
class StoreError(Exception):
41
    pass
42
43
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
44
class Store(object):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
45
    """This class represents the abstract storage layout for saving information.
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
46
    
1 by mbp at sourcefrog
import from baz patch-364
47
    Files can be added, but not modified once they are in.  Typically
48
    the hash is used as the name, or something else known to be unique,
49
    such as a UUID.
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
50
    """
51
52
    def __len__(self):
53
        raise NotImplementedError('Children should define their length')
54
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
55
    def get(self, file_id):
56
        """Returns a file reading from a particular entry."""
57
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
58
    def __getitem__(self, fileid):
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
59
        """DEPRECATED. Please use .get(file_id) instead."""
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
60
        raise NotImplementedError
61
1442.1.45 by Robert Collins
replace __contains__ calls in stores with has_id
62
    #def __contains__(self, fileid):
63
    #    """Deprecated, please use has_id"""
64
    #    raise NotImplementedError
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
65
66
    def __iter__(self):
67
        raise NotImplementedError
68
907.1.43 by John Arbash Meinel
Restoring compatibility for Storage.add(file, fileid), it is a little arbitrary, and compatibility is better
69
    def add(self, f, fileid):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
70
        """Add a file object f to the store accessible from the given fileid"""
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
71
        raise NotImplementedError('Children of Store must define their method of adding entries.')
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
72
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
73
    def add_multi(self, entries):
74
        """Add a series of file-like or string objects to the store with the given
75
        identities.
76
        
907.1.43 by John Arbash Meinel
Restoring compatibility for Storage.add(file, fileid), it is a little arbitrary, and compatibility is better
77
        :param entries: A list of tuples of file,id pairs [(file1, id1), (file2, id2), ...]
78
                        This could also be a generator yielding (file,id) pairs.
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
79
        """
907.1.43 by John Arbash Meinel
Restoring compatibility for Storage.add(file, fileid), it is a little arbitrary, and compatibility is better
80
        for f, fileid in entries:
81
            self.add(f, fileid)
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
82
1442.1.45 by Robert Collins
replace __contains__ calls in stores with has_id
83
    def has_id(self, file_id):
84
        """Return True or false for the presence of file_id in the store."""
85
        raise NotImplementedError
907.1.36 by John Arbash Meinel
Moving the multi-get functionality higher up into the Branch class.
86
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
87
    def listable(self):
88
        """Return True if this store is able to be listed."""
89
        return hasattr(self, "__iter__")
90
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
91
    def copy_multi(self, other, ids, pb=None, permit_failure=False):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
92
        """Copy texts for ids from other into self.
93
94
        If an id is present in self, it is skipped.  A count of copied
95
        ids is returned, which may be less than len(ids).
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
96
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
97
        :param other: Another Store object
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
98
        :param ids: A list of entry ids to be copied
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
99
        :param pb: A ProgressBar object, if none is given, the default will be created.
100
        :param permit_failure: Allow missing entries to be ignored
101
        :return: (n_copied, [failed]) The number of entries copied successfully,
102
            followed by a list of entries which could not be copied (because they
103
            were missing)
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
104
        """
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
105
        if pb is None:
106
            pb = bzrlib.ui.ui_factory.progress_bar()
107
1393.1.14 by Martin Pool
doc
108
        # XXX: Is there any reason why we couldn't make this accept a generator
109
        # and build a list as it finds things to copy?
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
110
        ids = list(ids) # Make sure we don't have a generator, since we iterate 2 times
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
111
        pb.update('preparing to copy')
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
112
        to_copy = []
1442.1.45 by Robert Collins
replace __contains__ calls in stores with has_id
113
        for file_id in ids:
114
            if not self.has_id(file_id):
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
115
                to_copy.append(file_id)
116
        return self._do_copy(other, to_copy, pb, permit_failure=permit_failure)
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
117
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
118
    def _do_copy(self, other, to_copy, pb, permit_failure=False):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
119
        """This is the standard copying mechanism, just get them one at
120
        a time from remote, and store them locally.
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
121
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
122
        :param other: Another Store object
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
123
        :param to_copy: A list of entry ids to copy
124
        :param pb: A ProgressBar object to display completion status.
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
125
        :param permit_failure: Allow missing entries to be ignored
126
        :return: (n_copied, [failed])
127
            The number of entries copied, and a list of failed entries.
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
128
        """
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
129
        # This should be updated to use add_multi() rather than
130
        # the current methods of buffering requests.
131
        # One question, is it faster to queue up 1-10 and then copy 1-10
132
        # then queue up 11-20, copy 11-20
133
        # or to queue up 1-10, copy 1, queue 11, copy 2, etc?
134
        # sort of pipeline versus batch.
907.1.30 by John Arbash Meinel
Updated CompressedTextStore to use copy_to when possible.
135
136
        # We can't use self._transport.copy_to because we don't know
137
        # whether the local tree is in the same format as other
974.2.7 by aaron.bentley at utoronto
Merged from bzr.24
138
        failed = set()
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
139
        def buffer_requests():
907.1.26 by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works.
140
            count = 0
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
141
            buffered_requests = []
142
            for fileid in to_copy:
974.1.30 by aaron.bentley at utoronto
Changed copy_multi to permit failure and return a tuple, tested missing required revisions
143
                try:
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
144
                    f = other.get(fileid)
974.1.77 by Aaron Bentley
Fixed branch handling of missing revisions
145
                except KeyError:
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
146
                    if permit_failure:
147
                        failed.add(fileid)
148
                        continue
149
                    else:
150
                        raise
151
152
                buffered_requests.append((f, fileid))
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
153
                if len(buffered_requests) > self._max_buffered_requests:
154
                    yield buffered_requests.pop(0)
155
                    count += 1
156
                    pb.update('copy', count, len(to_copy))
157
158
            for req in buffered_requests:
159
                yield req
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
160
                count += 1
161
                pb.update('copy', count, len(to_copy))
162
974.2.7 by aaron.bentley at utoronto
Merged from bzr.24
163
            assert count == len(to_copy)
907.1.26 by John Arbash Meinel
Fixing some store stuff so that 'bzr branch' works.
164
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
165
        self.add_multi(buffer_requests())
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
166
167
        pb.clear()
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
168
        return len(to_copy), failed
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
169
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
170
171
class TransportStore(Store):
172
    """A TransportStore is a Store superclass for Stores that use Transports."""
173
174
    _max_buffered_requests = 10
175
1442.1.33 by Robert Collins
teach TransportStore.add to accept an optional file suffix, which does not alter the fileid.
176
    def add(self, f, fileid, suffix=None):
1442.1.28 by Robert Collins
pull up core TransportStore.add from TextStore.add and CompressedTextStore.add
177
        """Add contents of a file into the store.
178
179
        f -- A file-like object, or string
180
        """
181
        mutter("add store entry %r" % (fileid))
1442.1.33 by Robert Collins
teach TransportStore.add to accept an optional file suffix, which does not alter the fileid.
182
        
183
        if suffix is not None:
184
            fn = self._relpath(fileid, [suffix])
185
        else:
186
            fn = self._relpath(fileid)
1442.1.28 by Robert Collins
pull up core TransportStore.add from TextStore.add and CompressedTextStore.add
187
        if self._transport.has(fn):
188
            raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))
189
190
        if self._prefixed:
191
            try:
192
                self._transport.mkdir(hash_prefix(fileid))
193
            except errors.FileExists:
194
                pass
195
196
        self._add(fn, f)
197
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
198
    def _check_fileid(self, fileid):
199
        if not isinstance(fileid, basestring):
200
            raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))
201
        if '\\' in fileid or '/' in fileid:
202
            raise ValueError("invalid store id %r" % fileid)
203
1442.1.45 by Robert Collins
replace __contains__ calls in stores with has_id
204
    def has_id(self, fileid):
1442.1.38 by Robert Collins
unify __contains__ for TransportStore classes
205
        fn = self._relpath(fileid)
206
        return self._transport.has(fn)
207
1442.1.36 by Robert Collins
convert get() in TextStore and CompressedTextStore into a template method
208
    def _get(self, filename):
209
        """Return an vanilla file stream for clients to read from.
210
211
        This is the body of a template method on 'get', and should be 
212
        implemented by subclasses.
213
        """
214
        raise NotImplementedError
215
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
216
    def get(self, fileid):
1433 by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages.
217
        """Returns a file reading from a particular entry."""
218
        fn = self._relpath(fileid)
219
        try:
1442.1.36 by Robert Collins
convert get() in TextStore and CompressedTextStore into a template method
220
            return self._get(fn)
1433 by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages.
221
        except errors.NoSuchFile:
222
            raise KeyError(fileid)
223
1442.1.44 by Robert Collins
Many transport related tweaks:
224
    def __init__(self, a_transport, prefixed=False):
225
        assert isinstance(a_transport, transport.Transport)
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
226
        super(TransportStore, self).__init__()
1442.1.44 by Robert Collins
Many transport related tweaks:
227
        self._transport = a_transport
1442.1.25 by Robert Collins
Test TransportStore._relpath for simple cases: pull up _prefixed attribute as a result.
228
        self._prefixed = prefixed
1442.1.43 by Robert Collins
add registration of suffixes, in preparation for ensuring iteration is regular
229
        self._suffixes = set()
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
230
1442.1.40 by Robert Collins
unify __len__() implementations for TransportStore classes
231
    def __len__(self):
232
        return len(list(self._iter_relpath()))
233
1442.1.26 by Robert Collins
Pull up _relpath with gz suffix for CompressedTextStore into TransportStore
234
    def _relpath(self, fileid, suffixes=[]):
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
235
        self._check_fileid(fileid)
1442.1.27 by Robert Collins
Check that file suffixes in TransportStore are also valid
236
        for suffix in suffixes:
1442.1.43 by Robert Collins
add registration of suffixes, in preparation for ensuring iteration is regular
237
            if not suffix in self._suffixes:
238
                raise ValueError("Unregistered suffix %r" % suffix)
1442.1.27 by Robert Collins
Check that file suffixes in TransportStore are also valid
239
            self._check_fileid(suffix)
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
240
        if self._prefixed:
1442.1.26 by Robert Collins
Pull up _relpath with gz suffix for CompressedTextStore into TransportStore
241
            path = [hash_prefix(fileid) + fileid]
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
242
        else:
1442.1.26 by Robert Collins
Pull up _relpath with gz suffix for CompressedTextStore into TransportStore
243
            path = [fileid]
244
        path.extend(suffixes)
245
        return '.'.join(path)
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
246
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
247
    def __repr__(self):
248
        if self._transport is None:
249
            return "%s(None)" % (self.__class__.__name__)
250
        else:
251
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
252
253
    __str__ = __repr__
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
254
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
255
    def listable(self):
256
        """Return True if this store is able to be listed."""
257
        return self._transport.listable()
258
1442.1.43 by Robert Collins
add registration of suffixes, in preparation for ensuring iteration is regular
259
    def register_suffix(self, suffix):
260
        """Register a suffix as being expected in this store."""
261
        self._check_fileid(suffix)
262
        self._suffixes.add(suffix)
263
1442.1.37 by Robert Collins
pull up total_size into TransportStore
264
    def total_size(self):
265
        """Return (count, bytes)
266
267
        This is the (compressed) size stored on disk, not the size of
268
        the content."""
269
        total = 0
270
        count = 0
1442.1.44 by Robert Collins
Many transport related tweaks:
271
        for relpath in self._transport.iter_files_recursive():
1442.1.37 by Robert Collins
pull up total_size into TransportStore
272
            count += 1
1442.1.44 by Robert Collins
Many transport related tweaks:
273
            total += self._transport.stat(relpath).st_size
1442.1.37 by Robert Collins
pull up total_size into TransportStore
274
                
275
        return count, total
276
1092.2.1 by Robert Collins
minor refactors to store, create an ImmutableMemoryStore for testing or other such operations
277
1442.1.44 by Robert Collins
Many transport related tweaks:
278
def ImmutableMemoryStore():
279
    return bzrlib.store.text.TextStore(transport.memory.MemoryTransport())
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
280
        
281
282
class CachedStore(Store):
1092.2.3 by Robert Collins
move CachedStore into store.py
283
    """A store that caches data locally, to avoid repeated downloads.
284
    The precacache method should be used to avoid server round-trips for
285
    every piece of data.
286
    """
287
288
    def __init__(self, store, cache_dir):
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
289
        super(CachedStore, self).__init__()
1092.2.3 by Robert Collins
move CachedStore into store.py
290
        self.source_store = store
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
291
        # This clones the source store type with a locally bound
292
        # transport. FIXME: it assumes a constructor is == cloning.
293
        # clonable store - it might be nicer to actually have a clone()
294
        # or something. RBC 20051003
295
        self.cache_store = store.__class__(LocalTransport(cache_dir))
1092.2.3 by Robert Collins
move CachedStore into store.py
296
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
297
    def get(self, id):
1092.2.3 by Robert Collins
move CachedStore into store.py
298
        mutter("Cache add %s" % id)
299
        if id not in self.cache_store:
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
300
            self.cache_store.add(self.source_store.get(id), id)
301
        return self.cache_store.get(id)
1092.2.3 by Robert Collins
move CachedStore into store.py
302
1442.1.45 by Robert Collins
replace __contains__ calls in stores with has_id
303
    def has_id(self, fileid):
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
304
        if fileid in self.cache_store:
305
            return True
306
        if fileid in self.source_store:
307
            # We could copy at this time
308
            return True
309
        return False
310
1092.2.3 by Robert Collins
move CachedStore into store.py
311
    def prefetch(self, ids):
312
        """Copy a series of ids into the cache, before they are used.
313
        For remote stores that support pipelining or async downloads, this can
314
        increase speed considerably.
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
315
1092.3.4 by Robert Collins
update symlink branch to integration
316
        Failures while prefetching are ignored.
1092.2.3 by Robert Collins
move CachedStore into store.py
317
        """
318
        mutter("Prefetch of ids %s" % ",".join(ids))
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
319
        self.cache_store.copy_multi(self.source_store, ids, 
1092.3.4 by Robert Collins
update symlink branch to integration
320
                                    permit_failure=True)
1092.2.19 by Robert Collins
update with integration
321
322
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
323
def copy_all(store_from, store_to):
324
    """Copy all ids from one store to another."""
1393.1.14 by Martin Pool
doc
325
    # TODO: Optional progress indicator
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
326
    if not store_from.listable():
327
        raise UnlistableStore(store_from)
328
    ids = [f for f in store_from]
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
329
    store_to.copy_multi(store_from, ids)
1393.2.1 by John Arbash Meinel
Merged in split-storage-2 branch. Need to cleanup a little bit more still.
330
1429 by Robert Collins
merge in niemeyers prefixed-store patch
331
def hash_prefix(file_id):
332
    return "%02x/" % (adler32(file_id) & 0xff)
333