~bzr-pqm/bzr/bzr.dev

1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
1
# Copyright (C) 2005 by Canonical Development Ltd
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""
18
An implementation the primary storage type CompressedTextStore.
19
20
This store keeps compressed versions of the full text. It does not
21
do any sort of delta compression.
22
"""
23
24
import os, tempfile, gzip
25
26
import bzrlib.store
1429 by Robert Collins
merge in niemeyers prefixed-store patch
27
from bzrlib.store import hash_prefix
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
28
from bzrlib.trace import mutter
1429 by Robert Collins
merge in niemeyers prefixed-store patch
29
from bzrlib.errors import BzrError, FileExists
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
30
31
from StringIO import StringIO
1430 by Robert Collins
touchup the prefixed-store patch
32
from stat import ST_SIZE
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
33
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
34
class CompressedTextStore(bzrlib.store.TransportStore):
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
35
    """Store that holds files indexed by unique names.
36
37
    Files can be added, but not modified once they are in.  Typically
38
    the hash is used as the name, or something else known to be unique,
39
    such as a UUID.
40
41
    Files are stored gzip compressed, with no delta compression.
42
43
    >>> st = ScratchCompressedTextStore()
44
45
    >>> st.add(StringIO('hello'), 'aa')
46
    >>> 'aa' in st
47
    True
48
    >>> 'foo' in st
49
    False
50
51
    You are not allowed to add an id that is already present.
52
53
    Entries can be retrieved as files, which may then be read.
54
55
    >>> st.add(StringIO('goodbye'), '123123')
56
    >>> st['123123'].read()
57
    'goodbye'
58
    """
59
1429 by Robert Collins
merge in niemeyers prefixed-store patch
60
    def __init__(self, transport, prefixed=False):
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
61
        super(CompressedTextStore, self).__init__(transport)
1429 by Robert Collins
merge in niemeyers prefixed-store patch
62
        self._prefixed = prefixed
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
63
64
    def _check_fileid(self, fileid):
65
        if '\\' in fileid or '/' in fileid:
66
            raise ValueError("invalid store id %r" % fileid)
67
68
    def _relpath(self, fileid):
69
        self._check_fileid(fileid)
1429 by Robert Collins
merge in niemeyers prefixed-store patch
70
        if self._prefixed:
71
            return hash_prefix(fileid) + fileid + ".gz"
72
        else:
73
            return fileid + ".gz"
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
74
75
    def add(self, f, fileid):
76
        """Add contents of a file into the store.
77
78
        f -- An open file, or file-like object."""
79
        # TODO: implement an add_multi which can do some of it's
80
        #       own piplelining, and possible take advantage of
81
        #       transport.put_multi(). The problem is that
82
        #       entries potentially need to be compressed as they
83
        #       are received, which implies translation, which
84
        #       means it isn't as straightforward as we would like.
85
        from cStringIO import StringIO
86
        from bzrlib.osutils import pumpfile
87
        
88
        mutter("add store entry %r" % (fileid))
89
        if isinstance(f, basestring):
90
            f = StringIO(f)
91
            
92
        fn = self._relpath(fileid)
93
        if self._transport.has(fn):
94
            raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))
95
1429 by Robert Collins
merge in niemeyers prefixed-store patch
96
        if self._prefixed:
97
            try:
98
                self._transport.mkdir(hash_prefix(fileid))
99
            except FileExists:
100
                pass
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
101
102
        sio = StringIO()
103
        gf = gzip.GzipFile(mode='wb', fileobj=sio)
104
        # if pumpfile handles files that don't fit in ram,
105
        # so will this function
106
        if isinstance(f, basestring):
107
            gf.write(f)
108
        else:
109
            pumpfile(f, gf)
110
        gf.close()
111
        sio.seek(0)
112
        self._transport.put(fn, sio)
113
114
    def _do_copy(self, other, to_copy, pb, permit_failure=False):
115
        if isinstance(other, CompressedTextStore):
116
            return self._copy_multi_text(other, to_copy, pb,
117
                    permit_failure=permit_failure)
118
        return super(CompressedTextStore, self)._do_copy(other, to_copy,
119
                pb, permit_failure=permit_failure)
120
121
    def _copy_multi_text(self, other, to_copy, pb,
122
            permit_failure=False):
123
        # Because of _transport, we can no longer assume
124
        # that they are on the same filesystem, we can, however
125
        # assume that we only need to copy the exact bytes,
126
        # we don't need to process the files.
127
128
        failed = set()
129
        if permit_failure:
130
            new_to_copy = set()
1185.11.3 by John Arbash Meinel
Got some more tests to pass, still broken.
131
            for fileid, has in zip(to_copy, other.has(to_copy)):
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
132
                if has:
133
                    new_to_copy.add(fileid)
134
                else:
135
                    failed.add(fileid)
136
            to_copy = new_to_copy
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
137
            #mutter('_copy_multi_text copying %s, failed %s' % (to_copy, failed))
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
138
139
        paths = [self._relpath(fileid) for fileid in to_copy]
140
        count = other._transport.copy_to(paths, self._transport, pb=pb)
141
        assert count == len(to_copy)
142
        return count, failed
143
144
    def __contains__(self, fileid):
145
        """"""
146
        fn = self._relpath(fileid)
147
        return self._transport.has(fn)
148
149
    def has(self, fileids, pb=None):
150
        """Return True/False for each entry in fileids.
151
152
        :param fileids: A List or generator yielding file ids.
153
        :return: A generator or list returning True/False for each entry.
154
        """
155
        relpaths = (self._relpath(fid) for fid in fileids)
156
        return self._transport.has_multi(relpaths, pb=pb)
157
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
158
    def get(self, fileids, permit_failure=False, pb=None):
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
159
        """Return a set of files, one for each requested entry.
160
        
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
161
        TODO: Write some tests to make sure that permit_failure is
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
162
              handled correctly.
163
164
        TODO: What should the exception be for a missing file?
165
              KeyError, or NoSuchFile?
166
        """
167
168
        # This next code gets a bit hairy because it can allow
169
        # to not request a file which doesn't seem to exist.
170
        # Also, the same fileid may be requested twice, so we
171
        # can't just build up a map.
172
        rel_paths = [self._relpath(fid) for fid in fileids]
173
        is_requested = []
174
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
175
        #mutter('CompressedTextStore.get(permit_failure=%s)' % permit_failure)
176
        if permit_failure:
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
177
            existing_paths = []
178
            for path, has in zip(rel_paths,
179
                    self._transport.has_multi(rel_paths)):
180
                if has:
181
                    existing_paths.append(path)
182
                    is_requested.append(True)
183
                else:
184
                    is_requested.append(False)
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
185
            #mutter('Retrieving %s out of %s' % (existing_paths, rel_paths))
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
186
        else:
1185.11.15 by John Arbash Meinel
Got HttpTransport tests to pass. Check for EAGAIN, pass permit_failure around, etc
187
            #mutter('Retrieving all %s' % (rel_paths, ))
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
188
            existing_paths = rel_paths
189
            is_requested = [True for x in rel_paths]
190
191
        count = 0
192
        for f in self._transport.get_multi(existing_paths, pb=pb):
193
            assert count < len(is_requested)
194
            while not is_requested[count]:
195
                yield None
196
                count += 1
197
            if hasattr(f, 'tell'):
198
                yield gzip.GzipFile(mode='rb', fileobj=f)
199
            else:
200
                from cStringIO import StringIO
201
                sio = StringIO(f.read())
202
                yield gzip.GzipFile(mode='rb', fileobj=sio)
203
            count += 1
204
205
        while count < len(is_requested):
206
            yield None
207
            count += 1
208
209
    def __iter__(self):
1429 by Robert Collins
merge in niemeyers prefixed-store patch
210
        for relpath, st in self._iter_relpaths():
211
            if relpath.endswith(".gz"):
212
                yield os.path.basename(relpath)[:-3]
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
213
            else:
1429 by Robert Collins
merge in niemeyers prefixed-store patch
214
                yield os.path.basename(relpath)
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
215
216
    def __len__(self):
1429 by Robert Collins
merge in niemeyers prefixed-store patch
217
        return len(list(self._iter_relpath()))
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
218
219
    def __getitem__(self, fileid):
220
        """Returns a file reading from a particular entry."""
1433 by Robert Collins
merge in and make incremental Gustavo Niemeyers nested log patch, and remove all bare exceptions in store and transport packages.
221
        f = super(CompressedTextStore, self).__getitem__(fileid)
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
222
        # gzip.GzipFile.read() requires a tell() function
223
        # but some transports return objects that cannot seek
224
        # so buffer them in a StringIO instead
225
        if hasattr(f, 'tell'):
226
            return gzip.GzipFile(mode='rb', fileobj=f)
227
        else:
228
            from cStringIO import StringIO
229
            sio = StringIO(f.read())
230
            return gzip.GzipFile(mode='rb', fileobj=sio)
231
232
    def total_size(self):
233
        """Return (count, bytes)
234
235
        This is the (compressed) size stored on disk, not the size of
236
        the content."""
237
        total = 0
238
        count = 0
1429 by Robert Collins
merge in niemeyers prefixed-store patch
239
        for relpath, st in self._iter_relpaths():
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
240
            count += 1
241
            total += st[ST_SIZE]
242
                
243
        return count, total
244
1092.2.24 by Robert Collins
merge from martins newformat branch - brings in transport abstraction
245
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
246
class ScratchCompressedTextStore(CompressedTextStore):
247
    """Self-destructing test subclass of CompressedTextStore.
248
249
    The Store only exists for the lifetime of the Python object.
250
    Obviously you should not put anything precious in it.
251
    """
252
    def __init__(self):
253
        from transport import transport
1393.2.3 by John Arbash Meinel
Fixing typos, updating stores, getting tests to pass.
254
        t = transport(tempfile.mkdtemp())
255
        super(ScratchCompressedTextStore, self).__init__(t)
1185.11.1 by John Arbash Meinel
(broken) Transport work is merged in. Tests do not pass yet.
256
257
    def __del__(self):
258
        self._transport.delete_multi(self._transport.list_dir('.'))
259
        os.rmdir(self._transport.base)
260
        mutter("%r destroyed" % self)
261