~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/store/compressed_text.py

Committer: Canonical.com Patch Queue Manager
Date: 2006-02-18 02:33:47 UTC
mfrom: (1534.1.24 integration)
Revision ID: pqm@pqm.ubuntu.com-20060218023347-0952c65f668bfd68

Merge Robert Collins integration.

files added:
BRANCH.TODO

INSTALL

bzrlib/bzrdir.py

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/gpg.py

bzrlib/iterablefile.py

bzrlib/lockable_files.py

bzrlib/lsprof.py

bzrlib/option.py

bzrlib/repository.py

bzrlib/rio.py

bzrlib/sign_my_commits.py

bzrlib/symbol_versioning.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_api.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_http.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_reweave.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_uncommit.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transport/ftp.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tsort.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/win32console.py

generate_docs.py

tools/__init__.py

tools/biobench.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/riodemo.py

tools/trace-revisions

files removed:
bzrlib/clone.py

bzrlib/mdiff.py

bzrlib/revfile.py

bzrlib/store/compressed_text.py

patches/cache-remote-revisions.diff

patches/cache_weave_inclusions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

testbzr

files renamed:
bzrlib/changeset.py => bzrlib/_changeset.py

bzrlib/merge_core.py => bzrlib/_merge_core.py

bzrlib/selftest/ => bzrlib/tests/

bzrlib/selftest/test_revision_info.py => bzrlib/tests/blackbox/test_revision_info.py

bzrlib/selftest/teststatus.py => bzrlib/tests/blackbox/test_status.py

bzrlib/selftest/blackbox.py => bzrlib/tests/blackbox/test_too_much.py

bzrlib/selftest/versioning.py => bzrlib/tests/blackbox/test_versioning.py

bzrlib/selftest/testbranch.py => bzrlib/tests/branch_implementations/test_branch.py

bzrlib/selftest/test_parent.py => bzrlib/tests/branch_implementations/test_parent.py

bzrlib/selftest/testannotate.py => bzrlib/tests/test_annotate.py

bzrlib/selftest/testconfig.py => bzrlib/tests/test_config.py

bzrlib/selftest/testdiff.py => bzrlib/tests/test_diff.py

bzrlib/selftest/testfetch.py => bzrlib/tests/test_fetch.py

bzrlib/selftest/testgraph.py => bzrlib/tests/test_graph.py

bzrlib/selftest/testhashcache.py => bzrlib/tests/test_hashcache.py

bzrlib/selftest/testidentitymap.py => bzrlib/tests/test_identitymap.py

bzrlib/selftest/testinv.py => bzrlib/tests/test_inv.py

bzrlib/selftest/testlog.py => bzrlib/tests/test_log.py

bzrlib/selftest/testmerge.py => bzrlib/tests/test_merge.py

bzrlib/selftest/testmerge3.py => bzrlib/tests/test_merge3.py

bzrlib/selftest/plugins.py => bzrlib/tests/test_plugins.py

bzrlib/selftest/testrevision.py => bzrlib/tests/test_revision.py

bzrlib/selftest/testrevisionnamespaces.py => bzrlib/tests/test_revisionnamespaces.py

bzrlib/selftest/testrevprops.py => bzrlib/tests/test_revprops.py

bzrlib/selftest/testsampler.py => bzrlib/tests/test_sampler.py

bzrlib/selftest/teststore.py => bzrlib/tests/test_store.py

bzrlib/selftest/testtestament.py => bzrlib/tests/test_testament.py

bzrlib/selftest/testtransactions.py => bzrlib/tests/test_transactions.py

bzrlib/selftest/testtransport.py => bzrlib/tests/test_transport.py

bzrlib/selftest/whitebox.py => bzrlib/tests/test_whitebox.py

bzrlib/selftest/testworkingtree.py => bzrlib/tests/test_workingtree.py

bzrlib/ui.py => bzrlib/ui/__init__.py

bzrlib/xml.py => bzrlib/xml_serializer.py

bzr-man.py => tools/doc_generate/autodoc_man.py

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/text.py

bzrlib/store/weave.py

bzrlib/testament.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/http.py

bzrlib/transport/local.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

contrib/zsh/_bzr

doc/random.txt

setup.py *

tutorial.txt

Show diffs side-by-side

added added

removed removed

bzrlib/store/compressed_text.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""

An implementation the primary storage type CompressedTextStore.

This store keeps compressed versions of the full text. It does not

do any sort of delta compression.

"""

import os, tempfile, gzip

import bzrlib.store

from bzrlib.store import hash_prefix

from bzrlib.trace import mutter

from bzrlib.errors import BzrError, FileExists

from StringIO import StringIO

from stat import ST_SIZE

class CompressedTextStore(bzrlib.store.TransportStore):

"""Store that holds files indexed by unique names.

Files can be added, but not modified once they are in. Typically

the hash is used as the name, or something else known to be unique,

such as a UUID.

Files are stored gzip compressed, with no delta compression.

>>> st = ScratchCompressedTextStore()

>>> st.add(StringIO('hello'), 'aa')

>>> 'aa' in st

True

>>> 'foo' in st

False

You are not allowed to add an id that is already present.

Entries can be retrieved as files, which may then be read.

>>> st.add(StringIO('goodbye'), '123123')

>>> st['123123'].read()

'goodbye'

"""

def __init__(self, transport, prefixed=False):

super(CompressedTextStore, self).__init__(transport)

self._prefixed = prefixed

def _check_fileid(self, fileid):

if '\\' in fileid or '/' in fileid:

raise ValueError("invalid store id %r" % fileid)

def _relpath(self, fileid):

self._check_fileid(fileid)

if self._prefixed:

return hash_prefix(fileid) + fileid + ".gz"

else:

return fileid + ".gz"

def add(self, f, fileid):

"""Add contents of a file into the store.

f -- An open file, or file-like object."""

# TODO: implement an add_multi which can do some of it's

# own piplelining, and possible take advantage of

# transport.put_multi(). The problem is that

# entries potentially need to be compressed as they

# are received, which implies translation, which

# means it isn't as straightforward as we would like.

from cStringIO import StringIO

from bzrlib.osutils import pumpfile

mutter("add store entry %r" % (fileid))

if isinstance(f, basestring):

f = StringIO(f)

fn = self._relpath(fileid)

if self._transport.has(fn):

raise BzrError("store %r already contains id %r" % (self._transport.base, fileid))

if self._prefixed:

try:

self._transport.mkdir(hash_prefix(fileid))

except FileExists:

100

pass

101

102

sio = StringIO()

103

gf = gzip.GzipFile(mode='wb', fileobj=sio)

104

# if pumpfile handles files that don't fit in ram,

105

# so will this function

106

if isinstance(f, basestring):

107

gf.write(f)

108

else:

109

pumpfile(f, gf)

110

gf.close()

111

sio.seek(0)

112

self._transport.put(fn, sio)

113

114

def _do_copy(self, other, to_copy, pb, permit_failure=False):

115

if isinstance(other, CompressedTextStore):

116

return self._copy_multi_text(other, to_copy, pb,

117

permit_failure=permit_failure)

118

return super(CompressedTextStore, self)._do_copy(other, to_copy,

119

pb, permit_failure=permit_failure)

120

121

def _copy_multi_text(self, other, to_copy, pb,

122

permit_failure=False):

123

# Because of _transport, we can no longer assume

124

# that they are on the same filesystem, we can, however

125

# assume that we only need to copy the exact bytes,

126

# we don't need to process the files.

127

128

failed = set()

129

if permit_failure:

130

new_to_copy = set()

131

for fileid, has in zip(to_copy, other.has(to_copy)):

132

if has:

133

new_to_copy.add(fileid)

134

else:

135

failed.add(fileid)

136

to_copy = new_to_copy

137

#mutter('_copy_multi_text copying %s, failed %s' % (to_copy, failed))

138

139

paths = [self._relpath(fileid) for fileid in to_copy]

140

count = other._transport.copy_to(paths, self._transport, pb=pb)

141

assert count == len(to_copy)

142

return count, failed

143

144

def __contains__(self, fileid):

145

""""""

146

fn = self._relpath(fileid)

147

return self._transport.has(fn)

148

149

def has(self, fileids, pb=None):

150

"""Return True/False for each entry in fileids.

151

152

:param fileids: A List or generator yielding file ids.

153

:return: A generator or list returning True/False for each entry.

154

"""

155

relpaths = (self._relpath(fid) for fid in fileids)

156

return self._transport.has_multi(relpaths, pb=pb)

157

158

def get(self, fileids, permit_failure=False, pb=None):

159

"""Return a set of files, one for each requested entry.

160

161

TODO: Write some tests to make sure that permit_failure is

162

handled correctly.

163

164

TODO: What should the exception be for a missing file?

165

KeyError, or NoSuchFile?

166

"""

167

168

# This next code gets a bit hairy because it can allow

169

# to not request a file which doesn't seem to exist.

170

# Also, the same fileid may be requested twice, so we

171

# can't just build up a map.

172

rel_paths = [self._relpath(fid) for fid in fileids]

173

is_requested = []

174

175

#mutter('CompressedTextStore.get(permit_failure=%s)' % permit_failure)

176

if permit_failure:

177

existing_paths = []

178

for path, has in zip(rel_paths,

179

self._transport.has_multi(rel_paths)):

180

if has:

181

existing_paths.append(path)

182

is_requested.append(True)

183

else:

184

is_requested.append(False)

185

#mutter('Retrieving %s out of %s' % (existing_paths, rel_paths))

186

else:

187

#mutter('Retrieving all %s' % (rel_paths, ))

188

existing_paths = rel_paths

189

is_requested = [True for x in rel_paths]

190

191

count = 0

192

for f in self._transport.get_multi(existing_paths, pb=pb):

193

assert count < len(is_requested)

194

while not is_requested[count]:

195

yield None

196

count += 1

197

if hasattr(f, 'tell'):

198

yield gzip.GzipFile(mode='rb', fileobj=f)

199

else:

200

from cStringIO import StringIO

201

sio = StringIO(f.read())

202

yield gzip.GzipFile(mode='rb', fileobj=sio)

203

count += 1

204

205

while count < len(is_requested):

206

yield None

207

count += 1

208

209

def __iter__(self):

210

for relpath, st in self._iter_relpaths():

211

if relpath.endswith(".gz"):

212

yield os.path.basename(relpath)[:-3]

213

else:

214

yield os.path.basename(relpath)

215

216

def __len__(self):

217

return len(list(self._iter_relpath()))

218

219

def __getitem__(self, fileid):

220

"""Returns a file reading from a particular entry."""

221

f = super(CompressedTextStore, self).__getitem__(fileid)

222

# gzip.GzipFile.read() requires a tell() function

223

# but some transports return objects that cannot seek

224

# so buffer them in a StringIO instead

225

if hasattr(f, 'tell'):

226

return gzip.GzipFile(mode='rb', fileobj=f)

227

else:

228

from cStringIO import StringIO

229

sio = StringIO(f.read())

230

return gzip.GzipFile(mode='rb', fileobj=sio)

231

232

def total_size(self):

233

"""Return (count, bytes)

234

235

This is the (compressed) size stored on disk, not the size of

236

the content."""

237

total = 0

238

count = 0

239

for relpath, st in self._iter_relpaths():

240

count += 1

241

total += st[ST_SIZE]

242

243

return count, total

244

245

246

class ScratchCompressedTextStore(CompressedTextStore):

247

"""Self-destructing test subclass of CompressedTextStore.

248

249

The Store only exists for the lifetime of the Python object.

250

Obviously you should not put anything precious in it.

251

"""

252

def __init__(self):

253

from transport import transport

254

t = transport(tempfile.mkdtemp())

255

super(ScratchCompressedTextStore, self).__init__(t)

256

257

def __del__(self):

258

self._transport.delete_multi(self._transport.list_dir('.'))

259

os.rmdir(self._transport.base)

260

mutter("%r destroyed" % self)

261

Older »