1
# Copyright (C) 2005 by Canonical Development Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
An implementation the primary storage type CompressedTextStore.
20
This store keeps compressed versions of the full text. It does not
21
do any sort of delta compression.
24
import os, tempfile, gzip
27
from bzrlib.trace import mutter
28
from bzrlib.errors import BzrError, FileExists
30
from StringIO import StringIO
32
class CompressedTextStore(bzrlib.store.TransportStore):
33
"""Store that holds files indexed by unique names.
35
Files can be added, but not modified once they are in. Typically
36
the hash is used as the name, or something else known to be unique,
39
Files are stored gzip compressed, with no delta compression.
41
>>> st = ScratchCompressedTextStore()
43
>>> st.add(StringIO('hello'), 'aa')
49
You are not allowed to add an id that is already present.
51
Entries can be retrieved as files, which may then be read.
53
>>> st.add(StringIO('goodbye'), '123123')
54
>>> st.get('123123').read()
58
def _relpath(self, fileid, suffixes=[]):
59
suffixes = suffixes + ['gz']
60
return super(CompressedTextStore, self)._relpath(fileid, suffixes)
62
def _add(self, fn, f):
63
from cStringIO import StringIO
64
from bzrlib.osutils import pumpfile
66
if isinstance(f, basestring):
70
gf = gzip.GzipFile(mode='wb', fileobj=sio)
71
# if pumpfile handles files that don't fit in ram,
72
# so will this function
73
if isinstance(f, basestring):
79
self._transport.put(fn, sio)
81
def _do_copy(self, other, to_copy, pb, permit_failure=False):
82
if isinstance(other, CompressedTextStore):
83
return self._copy_multi_text(other, to_copy, pb,
84
permit_failure=permit_failure)
85
return super(CompressedTextStore, self)._do_copy(other, to_copy,
86
pb, permit_failure=permit_failure)
88
def _copy_multi_text(self, other, to_copy, pb,
89
permit_failure=False):
90
# Because of _transport, we can no longer assume
91
# that they are on the same filesystem, we can, however
92
# assume that we only need to copy the exact bytes,
93
# we don't need to process the files.
98
for fileid, has in zip(to_copy, other.has(to_copy)):
100
new_to_copy.add(fileid)
103
to_copy = new_to_copy
104
#mutter('_copy_multi_text copying %s, failed %s' % (to_copy, failed))
106
paths = [self._relpath(fileid) for fileid in to_copy]
107
count = other._transport.copy_to(paths, self._transport, pb=pb)
108
assert count == len(to_copy)
111
def __init__(self, transport, prefixed=False):
112
super(CompressedTextStore, self).__init__(transport, prefixed)
113
self.register_suffix('gz')
116
for relpath in self._transport.iter_files_recursive():
117
if relpath.endswith(".gz"):
118
yield os.path.basename(relpath)[:-3]
120
yield os.path.basename(relpath)
122
def _get(self, filename):
123
"""Returns a file reading from a particular entry."""
124
f = self._transport.get(filename)
125
# gzip.GzipFile.read() requires a tell() function
126
# but some transports return objects that cannot seek
127
# so buffer them in a StringIO instead
128
if hasattr(f, 'tell'):
129
return gzip.GzipFile(mode='rb', fileobj=f)
131
from cStringIO import StringIO
132
sio = StringIO(f.read())
133
return gzip.GzipFile(mode='rb', fileobj=sio)
136
def ScratchTextStore():
137
return TextStore(ScratchTransport())