~bzr-pqm/bzr/bzr.dev

711 by Martin Pool
- store docs
1
# Copyright (C) 2005 by Canonical Development Ltd
1 by mbp at sourcefrog
import from baz patch-364
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
1374 by Martin Pool
todo
17
# TODO: Could remember a bias towards whether a particular store is typically
18
# compressed or not.
19
711 by Martin Pool
- store docs
20
"""
21
Stores are the main data-storage mechanism for Bazaar-NG.
1 by mbp at sourcefrog
import from baz patch-364
22
23
A store is a simple write-once container indexed by a universally
711 by Martin Pool
- store docs
24
unique ID.
25
"""
1 by mbp at sourcefrog
import from baz patch-364
26
1442.1.51 by Robert Collins
teach iter about suffixes
27
import os
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
28
from cStringIO import StringIO
1429 by Robert Collins
merge in niemeyers prefixed-store patch
29
from zlib import adler32
1185.1.41 by Robert Collins
massive patch from Alexander Belchenko - many PEP8 fixes, removes unused function uuid
30
1442.1.44 by Robert Collins
Many transport related tweaks:
31
import bzrlib
1433 by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages.
32
import bzrlib.errors as errors
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
33
from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible
1104 by Martin Pool
- Add a simple UIFactory
34
from bzrlib.trace import mutter
1442.1.44 by Robert Collins
Many transport related tweaks:
35
import bzrlib.transport as transport
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
36
from bzrlib.transport.local import LocalTransport
1 by mbp at sourcefrog
import from baz patch-364
37
38
######################################################################
39
# stores
40
41
class StoreError(Exception):
42
    pass
43
44
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
45
class Store(object):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
46
    """This class represents the abstract storage layout for saving information.
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
47
    
1 by mbp at sourcefrog
import from baz patch-364
48
    Files can be added, but not modified once they are in.  Typically
49
    the hash is used as the name, or something else known to be unique,
50
    such as a UUID.
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
51
    """
52
53
    def __len__(self):
54
        raise NotImplementedError('Children should define their length')
55
1442.1.50 by Robert Collins
test get with suffixes
56
    def get(self, file_id, suffix=None):
57
        """Returns a file reading from a particular entry.
58
        
59
        If suffix is present, retrieve the named suffix for file_id.
60
        """
61
        raise NotImplementedError
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
62
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
63
    def __getitem__(self, fileid):
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
64
        """DEPRECATED. Please use .get(file_id) instead."""
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
65
        raise NotImplementedError
66
1442.1.45 by Robert Collins
replace __contains__ calls in stores with has_id
67
    #def __contains__(self, fileid):
68
    #    """Deprecated, please use has_id"""
69
    #    raise NotImplementedError
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
70
71
    def __iter__(self):
72
        raise NotImplementedError
73
907.1.43 by John Arbash Meinel
Restoring compatibility for Storage.add(file, fileid), it is a little arbitrary, and compatibility is better
74
    def add(self, f, fileid):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
75
        """Add a file object f to the store accessible from the given fileid"""
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
76
        raise NotImplementedError('Children of Store must define their method of adding entries.')
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
77
1442.1.47 by Robert Collins
test for has with suffixed files
78
    def has_id(self, file_id, suffix=None):
79
        """Return True or false for the presence of file_id in the store.
80
        
81
        suffix, if present, is a per file suffix, i.e. for digital signature 
82
        data."""
1442.1.45 by Robert Collins
replace __contains__ calls in stores with has_id
83
        raise NotImplementedError
907.1.36 by John Arbash Meinel
Moving the multi-get functionality higher up into the Branch class.
84
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
85
    def listable(self):
86
        """Return True if this store is able to be listed."""
87
        return hasattr(self, "__iter__")
88
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
89
    def copy_multi(self, other, ids, pb=None, permit_failure=False):
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
90
        """Copy texts for ids from other into self.
91
92
        If an id is present in self, it is skipped.  A count of copied
93
        ids is returned, which may be less than len(ids).
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
94
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
95
        :param other: Another Store object
907.1.2 by John Arbash Meinel
Working on making Branch() do all of it's work over a Transport.
96
        :param ids: A list of entry ids to be copied
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
97
        :param pb: A ProgressBar object, if none is given, the default will be created.
98
        :param permit_failure: Allow missing entries to be ignored
99
        :return: (n_copied, [failed]) The number of entries copied successfully,
100
            followed by a list of entries which could not be copied (because they
101
            were missing)
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
102
        """
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
103
        if pb is None:
104
            pb = bzrlib.ui.ui_factory.progress_bar()
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
105
        pb.update('preparing to copy')
974.2.7 by aaron.bentley at utoronto
Merged from bzr.24
106
        failed = set()
1442.1.53 by Robert Collins
Unroll the multiple-copy logic enough to remove the duplicate iteration and yet retain the optimised gzip->gzip copy.
107
        count = 0
108
        ids = list(ids) # get the list for showing a length.
109
        for fileid in ids:
110
            count += 1
111
            if self.has_id(fileid):
112
                continue
113
            try:
114
                self._copy_one(fileid, other, pb)
115
                pb.update('copy', count, len(ids))
116
            except KeyError:
117
                if permit_failure:
118
                    failed.add(fileid)
119
                else:
120
                    raise
121
        assert count == len(ids)
907.1.1 by John Arbash Meinel
Reworking the Branch and Store code to support an abstracted filesystem layer.
122
        pb.clear()
1442.1.53 by Robert Collins
Unroll the multiple-copy logic enough to remove the duplicate iteration and yet retain the optimised gzip->gzip copy.
123
        return count, failed
124
125
    def _copy_one(self, fileid, other, pb):
126
        """Most generic copy-one object routine.
127
        
128
        Subclasses can override this to provide an optimised
129
        copy between their own instances. Such overriden routines
130
        should call this if they have no optimised facility for a 
131
        specific 'other'.
132
        """
133
        f = other.get(fileid)
134
        self.add(f, fileid)
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
135
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
136
137
class TransportStore(Store):
138
    """A TransportStore is a Store superclass for Stores that use Transports."""
139
1442.1.33 by Robert Collins
teach TransportStore.add to accept an optional file suffix, which does not alter the fileid.
140
    def add(self, f, fileid, suffix=None):
1442.1.28 by Robert Collins
pull up core TransportStore.add from TextStore.add and CompressedTextStore.add
141
        """Add contents of a file into the store.
142
143
        f -- A file-like object, or string
144
        """
145
        mutter("add store entry %r" % (fileid))
1442.1.33 by Robert Collins
teach TransportStore.add to accept an optional file suffix, which does not alter the fileid.
146
        
147
        if suffix is not None:
148
            fn = self._relpath(fileid, [suffix])
149
        else:
150
            fn = self._relpath(fileid)
1442.1.28 by Robert Collins
pull up core TransportStore.add from TextStore.add and CompressedTextStore.add
151
        if self._transport.has(fn):
152
            raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))
153
154
        if self._prefixed:
155
            try:
1442.1.47 by Robert Collins
test for has with suffixed files
156
                self._transport.mkdir(hash_prefix(fileid)[:-1])
1442.1.28 by Robert Collins
pull up core TransportStore.add from TextStore.add and CompressedTextStore.add
157
            except errors.FileExists:
158
                pass
159
160
        self._add(fn, f)
161
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
162
    def _check_fileid(self, fileid):
163
        if not isinstance(fileid, basestring):
164
            raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))
165
        if '\\' in fileid or '/' in fileid:
166
            raise ValueError("invalid store id %r" % fileid)
167
1442.1.47 by Robert Collins
test for has with suffixed files
168
    def has_id(self, fileid, suffix=None):
169
        """See Store.has_id."""
170
        if suffix is not None:
171
            fn = self._relpath(fileid, [suffix])
172
        else:
173
            fn = self._relpath(fileid)
1442.1.38 by Robert Collins
unify __contains__ for TransportStore classes
174
        return self._transport.has(fn)
175
1442.1.36 by Robert Collins
convert get() in TextStore and CompressedTextStore into a template method
176
    def _get(self, filename):
177
        """Return an vanilla file stream for clients to read from.
178
179
        This is the body of a template method on 'get', and should be 
180
        implemented by subclasses.
181
        """
182
        raise NotImplementedError
183
1442.1.50 by Robert Collins
test get with suffixes
184
    def get(self, fileid, suffix=None):
185
        """See Store.get()."""
186
        if suffix is None:
187
            fn = self._relpath(fileid)
188
        else:
189
            fn = self._relpath(fileid, [suffix])
1433 by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages.
190
        try:
1442.1.36 by Robert Collins
convert get() in TextStore and CompressedTextStore into a template method
191
            return self._get(fn)
1433 by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages.
192
        except errors.NoSuchFile:
193
            raise KeyError(fileid)
194
1442.1.44 by Robert Collins
Many transport related tweaks:
195
    def __init__(self, a_transport, prefixed=False):
196
        assert isinstance(a_transport, transport.Transport)
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
197
        super(TransportStore, self).__init__()
1442.1.44 by Robert Collins
Many transport related tweaks:
198
        self._transport = a_transport
1442.1.25 by Robert Collins
Test TransportStore._relpath for simple cases: pull up _prefixed attribute as a result.
199
        self._prefixed = prefixed
1442.1.43 by Robert Collins
add registration of suffixes, in preparation for ensuring iteration is regular
200
        self._suffixes = set()
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
201
1442.1.51 by Robert Collins
teach iter about suffixes
202
    def __iter__(self):
203
        for relpath in self._transport.iter_files_recursive():
204
            # worst case is one of each suffix.
205
            name = os.path.basename(relpath)
206
            if name.endswith('.gz'):
207
                name = name[:-3]
208
            skip = False
209
            for count in range(len(self._suffixes)):
210
                for suffix in self._suffixes:
211
                    if name.endswith('.' + suffix):
212
                        skip = True
213
            if not skip:
214
                yield name
215
1442.1.40 by Robert Collins
unify __len__() implementations for TransportStore classes
216
    def __len__(self):
1442.1.50 by Robert Collins
test get with suffixes
217
        return len(list(self.__iter__()))
1442.1.40 by Robert Collins
unify __len__() implementations for TransportStore classes
218
1442.1.26 by Robert Collins
Pull up _relpath with gz suffix for CompressedTextStore into TransportStore
219
    def _relpath(self, fileid, suffixes=[]):
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
220
        self._check_fileid(fileid)
1442.1.27 by Robert Collins
Check that file suffixes in TransportStore are also valid
221
        for suffix in suffixes:
1442.1.43 by Robert Collins
add registration of suffixes, in preparation for ensuring iteration is regular
222
            if not suffix in self._suffixes:
223
                raise ValueError("Unregistered suffix %r" % suffix)
1442.1.27 by Robert Collins
Check that file suffixes in TransportStore are also valid
224
            self._check_fileid(suffix)
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
225
        if self._prefixed:
1442.1.26 by Robert Collins
Pull up _relpath with gz suffix for CompressedTextStore into TransportStore
226
            path = [hash_prefix(fileid) + fileid]
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
227
        else:
1442.1.26 by Robert Collins
Pull up _relpath with gz suffix for CompressedTextStore into TransportStore
228
            path = [fileid]
229
        path.extend(suffixes)
230
        return '.'.join(path)
1442.1.24 by Robert Collins
Pull up _check_id and _relpath from Text and CompressedText stores into TransportStore
231
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
232
    def __repr__(self):
233
        if self._transport is None:
234
            return "%s(None)" % (self.__class__.__name__)
235
        else:
236
            return "%s(%r)" % (self.__class__.__name__, self._transport.base)
237
238
    __str__ = __repr__
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
239
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
240
    def listable(self):
241
        """Return True if this store is able to be listed."""
242
        return self._transport.listable()
243
1442.1.43 by Robert Collins
add registration of suffixes, in preparation for ensuring iteration is regular
244
    def register_suffix(self, suffix):
245
        """Register a suffix as being expected in this store."""
246
        self._check_fileid(suffix)
247
        self._suffixes.add(suffix)
248
1442.1.37 by Robert Collins
pull up total_size into TransportStore
249
    def total_size(self):
250
        """Return (count, bytes)
251
252
        This is the (compressed) size stored on disk, not the size of
253
        the content."""
254
        total = 0
255
        count = 0
1442.1.44 by Robert Collins
Many transport related tweaks:
256
        for relpath in self._transport.iter_files_recursive():
1442.1.37 by Robert Collins
pull up total_size into TransportStore
257
            count += 1
1442.1.44 by Robert Collins
Many transport related tweaks:
258
            total += self._transport.stat(relpath).st_size
1442.1.37 by Robert Collins
pull up total_size into TransportStore
259
                
260
        return count, total
261
1092.2.1 by Robert Collins
minor refactors to store, create an ImmutableMemoryStore for testing or other such operations
262
1442.1.44 by Robert Collins
Many transport related tweaks:
263
def ImmutableMemoryStore():
264
    return bzrlib.store.text.TextStore(transport.memory.MemoryTransport())
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
265
        
266
267
class CachedStore(Store):
1092.2.3 by Robert Collins
move CachedStore into store.py
268
    """A store that caches data locally, to avoid repeated downloads.
269
    The precacache method should be used to avoid server round-trips for
270
    every piece of data.
271
    """
272
273
    def __init__(self, store, cache_dir):
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
274
        super(CachedStore, self).__init__()
1092.2.3 by Robert Collins
move CachedStore into store.py
275
        self.source_store = store
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
276
        # This clones the source store type with a locally bound
277
        # transport. FIXME: it assumes a constructor is == cloning.
278
        # clonable store - it might be nicer to actually have a clone()
279
        # or something. RBC 20051003
280
        self.cache_store = store.__class__(LocalTransport(cache_dir))
1092.2.3 by Robert Collins
move CachedStore into store.py
281
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
282
    def get(self, id):
1092.2.3 by Robert Collins
move CachedStore into store.py
283
        mutter("Cache add %s" % id)
284
        if id not in self.cache_store:
1442.1.35 by Robert Collins
convert all users of __getitem__ into TransportStores to use .get instead
285
            self.cache_store.add(self.source_store.get(id), id)
286
        return self.cache_store.get(id)
1092.2.3 by Robert Collins
move CachedStore into store.py
287
1442.1.47 by Robert Collins
test for has with suffixed files
288
    def has_id(self, fileid, suffix=None):
289
        """See Store.has_id."""
290
        if self.cache_store.has_id(fileid, suffix):
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
291
            return True
1442.1.47 by Robert Collins
test for has with suffixed files
292
        if self.source_store.has_id(fileid, suffix):
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
293
            # We could copy at this time
294
            return True
295
        return False
296
1092.2.19 by Robert Collins
update with integration
297
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
298
def copy_all(store_from, store_to):
299
    """Copy all ids from one store to another."""
1393.1.14 by Martin Pool
doc
300
    # TODO: Optional progress indicator
1400.1.1 by Robert Collins
implement a basic test for the ui branch command from http servers
301
    if not store_from.listable():
302
        raise UnlistableStore(store_from)
303
    ids = [f for f in store_from]
1185.10.1 by Aaron Bentley
Added --basis option to bzr branch
304
    store_to.copy_multi(store_from, ids)
1393.2.1 by John Arbash Meinel
Merged in split-storage-2 branch. Need to cleanup a little bit more still.
305
1429 by Robert Collins
merge in niemeyers prefixed-store patch
306
def hash_prefix(file_id):
307
    return "%02x/" % (adler32(file_id) & 0xff)
308