~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/groupcompress.py

Committer: John Arbash Meinel
Date: 2010-09-21 19:30:33 UTC
mto: This revision was merged to the branch mainline in revision 5451.
Revision ID: john@arbash-meinel.com-20100921193033-9ftw56og72mhlwo4

Change GroupCompressBlock to work in self._z_compress_chunks

This pushes down one of the peak memory locations. We still have a requirement
during commit of 1 fulltext + 2 compressed texts, but at least this code
path is now better about only using 1 fulltext and 1 compressed text.
We need to push this into more apis to get a bigger benefit.

files added:
bzrlib/controldir.py

bzrlib/doc_generate/builders

bzrlib/doc_generate/builders/__init__.py

bzrlib/doc_generate/builders/texinfo.py

bzrlib/doc_generate/writers

bzrlib/doc_generate/writers/__init__.py

bzrlib/doc_generate/writers/texinfo.py

bzrlib/library_state.py

bzrlib/plugins/bash_completion

bzrlib/plugins/bash_completion/README.txt

bzrlib/plugins/bash_completion/__init__.py

bzrlib/plugins/bash_completion/bashcomp.py

bzrlib/plugins/bash_completion/tests

bzrlib/plugins/bash_completion/tests/__init__.py

bzrlib/plugins/bash_completion/tests/test_bashcomp.py

bzrlib/recordcounter.py

bzrlib/tests/blackbox/test_deleted.py

bzrlib/tests/blackbox/test_lookup_revision.py

bzrlib/tests/doc_generate

bzrlib/tests/doc_generate/__init__.py

bzrlib/tests/doc_generate/builders

bzrlib/tests/doc_generate/builders/__init__.py

bzrlib/tests/doc_generate/builders/test_texinfo.py

bzrlib/tests/doc_generate/writers

bzrlib/tests/doc_generate/writers/__init__.py

bzrlib/tests/doc_generate/writers/test_texinfo.py

bzrlib/tests/fixtures.py

bzrlib/tests/per_branch/test_config.py

bzrlib/tests/per_bzrdir

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/test_copy_content_into.py

bzrlib/tests/per_interbranch/test_get.py

bzrlib/tests/per_workingtree/test_symlinks.py

bzrlib/tests/test__btree_serializer.py

bzrlib/tests/test_fixtures.py

bzrlib/tests/test_library_state.py

bzrlib/tests/test_test_server.py

bzrlib/tests/test_treeshape.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/testui.py

bzrlib/transport/gio_transport.py

contrib/bash/bzr

contrib/zsh/README

doc/developers/code-review.txt

doc/developers/code-style.txt

doc/developers/transports.txt

doc/developers/ui.txt

doc/en/whats-new/whats-new-in-2.3.txt

tools/win32/py2exe_boot_common.py

files removed:
bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_tags.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/zsh/_bzr

files renamed:
bzrlib/doc_generate/sphinx_conf.py => bzrlib/doc_generate/conf.py

bzrlib/tests/per_bzrdir/ => bzrlib/tests/per_controldir/

bzrlib/tests/per_bzrdir/test_bzrdir.py => bzrlib/tests/per_controldir/test_controldir.py

bzrlib/tests/per_bzrdir_colo/ => bzrlib/tests/per_controldir_colo/

files modified:
INSTALL

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_annotator_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_pyx.pyx

bzrlib/_readdir_pyx.pyx

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/add.py

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/chk_map.py

bzrlib/clean_tree.py

bzrlib/cleanup.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/crash.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/filters/eol.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/groupcompress.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflict-types.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_propose.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/shelf_ui.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/status.py

bzrlib/store/text.py

bzrlib/strace.py

bzrlib/switch.py

bzrlib/symbol_versioning.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_clean_tree.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_rmbranch.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/features.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_controldir/__init__.py

bzrlib/tests/per_controldir/test_push.py

bzrlib/tests/per_controldir_colo/__init__.py

bzrlib/tests/per_controldir_colo/test_supported.py

bzrlib/tests/per_controldir_colo/test_unsupported.py

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/script.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__bencode.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_cmdline.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_config.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_script.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_server.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/treeshape.py

bzrlib/textmerge.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/local.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/util/_bencode_py.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/bug-handling.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/conf.py

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/development-repo.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/integration.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/Makefile

doc/en/_templates/index.html

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/hooks-plugins.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/migration.txt

doc/en/admin-guide/simple-setups.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/index.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/overview.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-reference/readme.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/es/conf.py

doc/es/index.txt

doc/es/mini-tutorial/index.txt

doc/index.es.txt

doc/index.ja.txt

doc/index.ru.txt

doc/index.txt

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-reference/index.txt

doc/ru/conf.py

doc/ru/index.txt

doc/ru/mini-tutorial/index.txt

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide/introducing_bazaar.txt

setup.py

tools/check-newsbugs.py

tools/generate_release_notes.py

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

101

def __init__(self):

102

# map by key? or just order in file?

103

self._compressor_name = None

104

self._z_content = None

104

self._z_content_chunks = None

105

self._z_content_decompressor = None

106

self._z_content_length = None

107

self._content_length = None

135

self._content = ''.join(self._content_chunks)

136

self._content_chunks = None

137

if self._content is None:

138

if self._z_content is None:

138

# We join self._z_content_chunks here, because if we are

139

# decompressing, then it is *very* likely that we have a single

140

# chunk

141

if self._z_content_chunks is None:

139

142

raise AssertionError('No content to decompress')

140

if self._z_content == '':

143

z_content = ''.join(self._z_content_chunks)

144

if z_content == '':

141

145

self._content = ''

142

146

elif self._compressor_name == 'lzma':

143

147

# We don't do partial lzma decomp yet

144

self._content = pylzma.decompress(self._z_content)

148

self._content = pylzma.decompress(z_content)

145

149

elif self._compressor_name == 'zlib':

146

150

# Start a zlib decompressor

147

151

if num_bytes * 4 > self._content_length * 3:

148

152

# If we are requesting more that 3/4ths of the content,

149

153

# just extract the whole thing in a single pass

150

154

num_bytes = self._content_length

151

self._content = zlib.decompress(self._z_content)

155

self._content = zlib.decompress(z_content)

152

156

else:

153

157

self._z_content_decompressor = zlib.decompressobj()

154

158

# Seed the decompressor with the uncompressed bytes, so

155

159

# that the rest of the code is simplified

156

160

self._content = self._z_content_decompressor.decompress(

157

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

161

z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

158

162

if not self._z_content_decompressor.unconsumed_tail:

159

163

self._z_content_decompressor = None

160

164

else:

207

211

# XXX: Define some GCCorrupt error ?

208

212

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

209

213

(len(bytes), pos, self._z_content_length))

210

self._z_content = bytes[pos:]

214

self._z_content_chunks = (bytes[pos:],)

215

216

@property

217

def _z_content(self):

218

if self._z_content_chunks is not None:

219

return ''.join(self._z_content_chunks)

220

return None

211

221

212

222

@classmethod

213

223

def from_bytes(cls, bytes):

269

279

self._content_length = length

270

280

self._content_chunks = content_chunks

271

281

self._content = None

272

self._z_content = None

282

self._z_content_chunks = None

273

283

274

284

def set_content(self, content):

275

285

"""Set the content of this block."""

276

286

self._content_length = len(content)

277

287

self._content = content

278

self._z_content = None

288

self._z_content_chunks = None

279

289

280

290

def _create_z_content_using_lzma(self):

281

291

if self._content_chunks is not None:

283

293

self._content_chunks = None

284

294

if self._content is None:

285

295

raise AssertionError('Nothing to compress')

286

self._z_content = pylzma.compress(self._content)

287

self._z_content_length = len(self._z_content)

296

z_content = pylzma.compress(self._content)

297

self._z_content_chunks = (z_content,)

298

self._z_content_length = len(z_content)

288

299

289

def _create_z_content_from_chunks(self):

300

def _create_z_content_from_chunks(self, chunks):

290

301

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

291

compressed_chunks = map(compressor.compress, self._content_chunks)

302

# Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead

303

# (measured peak is maybe 30MB over the above...)

304

compressed_chunks = map(compressor.compress, chunks)

292

305

compressed_chunks.append(compressor.flush())

293

self._z_content = ''.join(compressed_chunks)

294

self._z_content_length = len(self._z_content)

306

# Ignore empty chunks

307

self._z_content_chunks = [c for c in compressed_chunks if c]

308

self._z_content_length = sum(map(len, self._z_content_chunks))

295

309

296

310

def _create_z_content(self):

297

if self._z_content is not None:

311

if self._z_content_chunks is not None:

298

312

return

299

313

if _USE_LZMA:

300

314

self._create_z_content_using_lzma()

301

315

return

302

316

if self._content_chunks is not None:

303

self._create_z_content_from_chunks()

304

return

305

self._z_content = zlib.compress(self._content)

306

self._z_content_length = len(self._z_content)

317

chunks = self._content_chunks

318

else:

319

chunks = (self._content,)

320

self._create_z_content_from_chunks(chunks)

307

321

308

def to_bytes(self):

309

"""Encode the information into a byte stream."""

322

def to_chunks(self):

323

"""Create the byte stream as a series of 'chunks'"""

310

324

self._create_z_content()

311

325

if _USE_LZMA:

312

326

header = self.GCB_LZ_HEADER

313

327

else:

314

328

header = self.GCB_HEADER

315

chunks = [header,

316

'%d\n%d\n' % (self._z_content_length, self._content_length),

317

self._z_content,

329

chunks = ['%s%d\n%d\n'

330

% (header, self._z_content_length, self._content_length),

318

331

]

332

chunks.extend(self._z_content_chunks)

333

total_len = sum(map(len, chunks))

334

return total_len, chunks

335

336

def to_bytes(self):

337

"""Encode the information into a byte stream."""

338

total_len, chunks = self.to_chunks()

319

339

return ''.join(chunks)

320

340

321

341

def _dump(self, include_text=False):

679

699

z_header_bytes = zlib.compress(header_bytes)

680

700

del header_bytes

681

701

z_header_bytes_len = len(z_header_bytes)

682

block_bytes = self._block.to_bytes()

702

block_bytes_len, block_chunks = self._block.to_chunks()

683

703

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

684

len(block_bytes)))

704

block_bytes_len))

685

705

lines.append(z_header_bytes)

686

lines.append(block_bytes)

687

del z_header_bytes, block_bytes

706

lines.extend(block_chunks)

707

del z_header_bytes, block_chunks

708

# TODO: This is a point where we will double the memory consumption. To

709

# avoid this, we probably have to switch to a 'chunked' api

688

710

return ''.join(lines)

689

711

690

712

@classmethod

691

713

def from_bytes(cls, bytes):

692

714

# TODO: This does extra string copying, probably better to do it a

693

# different way

715

# different way. At a minimum this creates 2 copies of the

716

# compressed content

694

717

(storage_kind, z_header_len, header_len,

695

718

block_len, rest) = bytes.split('\n', 4)

696

719

del bytes

854

877

855

878

After calling this, the compressor should no longer be used

856

879

"""

857

# TODO: this causes us to 'bloat' to 2x the size of content in the

858

# group. This has an impact for 'commit' of large objects.

859

# One possibility is to use self._content_chunks, and be lazy and

860

# only fill out self._content as a full string when we actually

861

# need it. That would at least drop the peak memory consumption

862

# for 'commit' down to ~1x the size of the largest file, at a

863

# cost of increased complexity within this code. 2x is still <<

864

# 3x the size of the largest file, so we are doing ok.

865

880

self._block.set_chunked_content(self.chunks, self.endpoint)

866

881

self.chunks = None

867

882

self._delta_index = None

1630

1645

self._unadded_refs = {}

1631

1646

keys_to_add = []

1632

1647

def flush():

1633

bytes = self._compressor.flush().to_bytes()

1648

bytes_len, chunks = self._compressor.flush().to_chunks()

1634

1649

self._compressor = GroupCompressor()

1650

# Note: At this point we still have 1 copy of the fulltext (in

1651

# record and the var 'bytes'), and this generates 2 copies of

1652

# the compressed text (one for bytes, one in chunks)

1653

# TODO: Push 'chunks' down into the _access api, so that we don't

1654

# have to double compressed memory here

1655

# TODO: Figure out how to indicate that we would be happy to free

1656

# the fulltext content at this point. Note that sometimes we

1657

# will want it later (streaming CHK pages), but most of the

1658

# time we won't (everything else)

1659

bytes = ''.join(chunks)

1660

del chunks

1635

1661

index, start, length = self._access.add_raw_records(

1636

1662

[(None, len(bytes))], bytes)[0]

1637

1663

nodes = []

1809

1835

return result

1810

1836

1811

1837

1838

class _GCBuildDetails(object):

1839

"""A blob of data about the build details.

1840

1841

This stores the minimal data, which then allows compatibility with the old

1842

api, without taking as much memory.

1843

"""

1844

1845

__slots__ = ('_index', '_group_start', '_group_end', '_basis_end',

1846

'_delta_end', '_parents')

1847

1848

method = 'group'

1849

compression_parent = None

1850

1851

def __init__(self, parents, position_info):

1852

self._parents = parents

1853

(self._index, self._group_start, self._group_end, self._basis_end,

1854

self._delta_end) = position_info

1855

1856

def __repr__(self):

1857

return '%s(%s, %s)' % (self.__class__.__name__,

1858

self.index_memo, self._parents)

1859

1860

@property

1861

def index_memo(self):

1862

return (self._index, self._group_start, self._group_end,

1863

self._basis_end, self._delta_end)

1864

1865

@property

1866

def record_details(self):

1867

return static_tuple.StaticTuple(self.method, None)

1868

1869

def __getitem__(self, offset):

1870

"""Compatibility thunk to act like a tuple."""

1871

if offset == 0:

1872

return self.index_memo

1873

elif offset == 1:

1874

return self.compression_parent # Always None

1875

elif offset == 2:

1876

return self._parents

1877

elif offset == 3:

1878

return self.record_details

1879

else:

1880

raise IndexError('offset out of range')

1881

1882

def __len__(self):

1883

return 4

1884

1885

1812

1886

class _GCGraphIndex(object):

1813

1887

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1814

1888

2009

2083

parents = None

2010

2084

else:

2011

2085

parents = entry[3][0]

2012

method = 'group'

2013

result[key] = (self._node_to_position(entry),

2014

None, parents, (method, None))

2086

details = _GCBuildDetails(parents, self._node_to_position(entry))

2087

result[key] = details

2015

2088

return result

2016

2089

2017

2090

def keys(self):

2033

2106

# each, or about 7MB. Note that it might be even more when you consider

2034

2107

# how PyInt is allocated in separate slabs. And you can't return a slab

2035

2108

# to the OS if even 1 int on it is in use. Note though that Python uses

2036

# a LIFO when re-using PyInt slots, which probably causes more

2109

# a LIFO when re-using PyInt slots, which might cause more

2037

2110

# fragmentation.

2038

2111

start = int(bits[0])

2039

2112

start = self._int_cache.setdefault(start, start)

Older »