~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Andrew Bennetts
Date: 2010-10-13 00:26:41 UTC
mto: This revision was merged to the branch mainline in revision 5498.
Revision ID: andrew.bennetts@canonical.com-20101013002641-9tlh9k89mlj1666m

Keep docs-plain working.

files added:
NEWS

bzrlib/controldir.py

bzrlib/doc_generate/builders

bzrlib/doc_generate/builders/__init__.py

bzrlib/doc_generate/builders/texinfo.py

bzrlib/doc_generate/writers

bzrlib/doc_generate/writers/__init__.py

bzrlib/doc_generate/writers/texinfo.py

bzrlib/library_state.py

bzrlib/tests/doc_generate

bzrlib/tests/doc_generate/__init__.py

bzrlib/tests/doc_generate/builders

bzrlib/tests/doc_generate/builders/__init__.py

bzrlib/tests/doc_generate/builders/test_texinfo.py

bzrlib/tests/doc_generate/writers

bzrlib/tests/doc_generate/writers/__init__.py

bzrlib/tests/doc_generate/writers/test_texinfo.py

bzrlib/tests/fixtures.py

bzrlib/tests/per_bzrdir

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_workingtree/test_symlinks.py

bzrlib/tests/test__btree_serializer.py

bzrlib/tests/test_fixtures.py

bzrlib/tests/test_library_state.py

bzrlib/tests/test_test_server.py

bzrlib/tests/test_treeshape.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/testui.py

doc/developers/transports.txt

doc/developers/ui.txt

doc/en/release-notes/bzr-0.1.txt

doc/en/release-notes/bzr-0.10.txt

doc/en/release-notes/bzr-0.11.txt

doc/en/release-notes/bzr-0.12.txt

doc/en/release-notes/bzr-0.13.txt

doc/en/release-notes/bzr-0.14.txt

doc/en/release-notes/bzr-0.15.txt

doc/en/release-notes/bzr-0.16.txt

doc/en/release-notes/bzr-0.17.txt

doc/en/release-notes/bzr-0.18.txt

doc/en/release-notes/bzr-0.6.txt

doc/en/release-notes/bzr-0.7.txt

doc/en/release-notes/bzr-0.8.txt

doc/en/release-notes/bzr-0.9.txt

doc/en/release-notes/bzr-0.90.txt

doc/en/release-notes/bzr-0.91.txt

doc/en/release-notes/bzr-0.92.txt

doc/en/release-notes/bzr-1.0.txt

doc/en/release-notes/bzr-1.1.txt

doc/en/release-notes/bzr-1.10.txt

doc/en/release-notes/bzr-1.11.txt

doc/en/release-notes/bzr-1.12.txt

doc/en/release-notes/bzr-1.13.txt

doc/en/release-notes/bzr-1.14.txt

doc/en/release-notes/bzr-1.15.txt

doc/en/release-notes/bzr-1.16.txt

doc/en/release-notes/bzr-1.17.txt

doc/en/release-notes/bzr-1.18.txt

doc/en/release-notes/bzr-1.2.txt

doc/en/release-notes/bzr-1.3.txt

doc/en/release-notes/bzr-1.4.txt

doc/en/release-notes/bzr-1.5.txt

doc/en/release-notes/bzr-1.6.txt

doc/en/release-notes/bzr-1.7.txt

doc/en/release-notes/bzr-1.8.txt

doc/en/release-notes/bzr-1.9.txt

doc/en/release-notes/bzr-2.0.txt

doc/en/release-notes/bzr-2.1.txt

doc/en/release-notes/bzr-2.2.txt

doc/en/whats-new/whats-new-in-2.3.txt

tools/win32/py2exe_boot_common.py

files removed:
bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_info.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_knit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_pack.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_startup.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_tags.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/many_commit.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

files renamed:
bzrlib/doc_generate/sphinx_conf.py => bzrlib/doc_generate/conf.py

bzrlib/tests/per_bzrdir/ => bzrlib/tests/per_controldir/

bzrlib/tests/per_bzrdir/test_bzrdir.py => bzrlib/tests/per_controldir/test_controldir.py

bzrlib/tests/per_bzrdir_colo/ => bzrlib/tests/per_controldir_colo/

NEWS-template.txt => doc/en/release-notes/NEWS-template.txt

NEWS => doc/en/release-notes/bzr-2.3.txt

files modified:
.bzrignore

Makefile

bzrlib/__init__.py

bzrlib/_annotator_py.py

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_groupcompress_pyx.pyx

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.pxd

bzrlib/add.py

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/breakin.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/chk_map.py

bzrlib/clean_tree.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/crash.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/filters/eol.py

bzrlib/generate_ids.py

bzrlib/globbing.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/hashcache.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflict-types.txt

bzrlib/help_topics/en/patterns.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/inventory.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/msgeditor.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/test_account.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/progress.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/bzrdir.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/server.py

bzrlib/status.py

bzrlib/store/text.py

bzrlib/strace.py

bzrlib/symbol_versioning.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_deleted.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_rmbranch.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/features.py

bzrlib/tests/ftp_server/medusa_based.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/https_server.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_bound_sftp.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_controldir/__init__.py

bzrlib/tests/per_controldir/test_push.py

bzrlib/tests/per_controldir_colo/__init__.py

bzrlib/tests/per_controldir_colo/test_supported.py

bzrlib/tests/per_controldir_colo/test_unsupported.py

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_has_same_location.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/script.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__bencode.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_clean_tree.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_config.py

bzrlib/tests/test_debug.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_globbing.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_http.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lazy_regex.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_osutils_encodings.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_script.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_server.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/treeshape.py

bzrlib/textmerge.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/gio_transport.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/local.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/code-review.txt

doc/developers/code-style.txt

doc/developers/conf.py

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/development-repo.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/overview.txt

doc/developers/packrepo.txt

doc/developers/planned-performance-changes.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/Makefile

doc/en/_templates/index.html

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/hooks-plugins.txt

doc/en/admin-guide/migration.txt

doc/en/admin-guide/other-setups.txt

doc/en/admin-guide/simple-setups.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/index.txt

doc/en/mini-tutorial/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/overview.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/web_browsing.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/user-reference/readme.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/es/conf.py

doc/es/index.txt

doc/es/mini-tutorial/index.txt

doc/index.es.txt

doc/index.ja.txt

doc/index.ru.txt

doc/index.txt

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-reference/index.txt

doc/ru/conf.py

doc/ru/index.txt

doc/ru/mini-tutorial/index.txt

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide/introducing_bazaar.txt

setup.py

tools/check-newsbugs.py

tools/generate_release_notes.py

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

101

def __init__(self):

102

# map by key? or just order in file?

103

self._compressor_name = None

104

self._z_content = None

104

self._z_content_chunks = None

105

self._z_content_decompressor = None

106

self._z_content_length = None

107

self._content_length = None

135

self._content = ''.join(self._content_chunks)

136

self._content_chunks = None

137

if self._content is None:

138

if self._z_content is None:

138

# We join self._z_content_chunks here, because if we are

139

# decompressing, then it is *very* likely that we have a single

140

# chunk

141

if self._z_content_chunks is None:

139

142

raise AssertionError('No content to decompress')

140

if self._z_content == '':

143

z_content = ''.join(self._z_content_chunks)

144

if z_content == '':

141

145

self._content = ''

142

146

elif self._compressor_name == 'lzma':

143

147

# We don't do partial lzma decomp yet

144

self._content = pylzma.decompress(self._z_content)

148

self._content = pylzma.decompress(z_content)

145

149

elif self._compressor_name == 'zlib':

146

150

# Start a zlib decompressor

147

151

if num_bytes * 4 > self._content_length * 3:

148

152

# If we are requesting more that 3/4ths of the content,

149

153

# just extract the whole thing in a single pass

150

154

num_bytes = self._content_length

151

self._content = zlib.decompress(self._z_content)

155

self._content = zlib.decompress(z_content)

152

156

else:

153

157

self._z_content_decompressor = zlib.decompressobj()

154

158

# Seed the decompressor with the uncompressed bytes, so

155

159

# that the rest of the code is simplified

156

160

self._content = self._z_content_decompressor.decompress(

157

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

161

z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

158

162

if not self._z_content_decompressor.unconsumed_tail:

159

163

self._z_content_decompressor = None

160

164

else:

207

211

# XXX: Define some GCCorrupt error ?

208

212

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

209

213

(len(bytes), pos, self._z_content_length))

210

self._z_content = bytes[pos:]

214

self._z_content_chunks = (bytes[pos:],)

215

216

@property

217

def _z_content(self):

218

"""Return z_content_chunks as a simple string.

219

220

Meant only to be used by the test suite.

221

"""

222

if self._z_content_chunks is not None:

223

return ''.join(self._z_content_chunks)

224

return None

211

225

212

226

@classmethod

213

227

def from_bytes(cls, bytes):

269

283

self._content_length = length

270

284

self._content_chunks = content_chunks

271

285

self._content = None

272

self._z_content = None

286

self._z_content_chunks = None

273

287

274

288

def set_content(self, content):

275

289

"""Set the content of this block."""

276

290

self._content_length = len(content)

277

291

self._content = content

278

self._z_content = None

292

self._z_content_chunks = None

279

293

280

294

def _create_z_content_using_lzma(self):

281

295

if self._content_chunks is not None:

283

297

self._content_chunks = None

284

298

if self._content is None:

285

299

raise AssertionError('Nothing to compress')

286

self._z_content = pylzma.compress(self._content)

287

self._z_content_length = len(self._z_content)

300

z_content = pylzma.compress(self._content)

301

self._z_content_chunks = (z_content,)

302

self._z_content_length = len(z_content)

288

303

289

def _create_z_content_from_chunks(self):

304

def _create_z_content_from_chunks(self, chunks):

290

305

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

291

compressed_chunks = map(compressor.compress, self._content_chunks)

306

# Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead

307

# (measured peak is maybe 30MB over the above...)

308

compressed_chunks = map(compressor.compress, chunks)

292

309

compressed_chunks.append(compressor.flush())

293

self._z_content = ''.join(compressed_chunks)

294

self._z_content_length = len(self._z_content)

310

# Ignore empty chunks

311

self._z_content_chunks = [c for c in compressed_chunks if c]

312

self._z_content_length = sum(map(len, self._z_content_chunks))

295

313

296

314

def _create_z_content(self):

297

if self._z_content is not None:

315

if self._z_content_chunks is not None:

298

316

return

299

317

if _USE_LZMA:

300

318

self._create_z_content_using_lzma()

301

319

return

302

320

if self._content_chunks is not None:

303

self._create_z_content_from_chunks()

304

return

305

self._z_content = zlib.compress(self._content)

306

self._z_content_length = len(self._z_content)

321

chunks = self._content_chunks

322

else:

323

chunks = (self._content,)

324

self._create_z_content_from_chunks(chunks)

307

325

308

def to_bytes(self):

309

"""Encode the information into a byte stream."""

326

def to_chunks(self):

327

"""Create the byte stream as a series of 'chunks'"""

310

328

self._create_z_content()

311

329

if _USE_LZMA:

312

330

header = self.GCB_LZ_HEADER

313

331

else:

314

332

header = self.GCB_HEADER

315

chunks = [header,

316

'%d\n%d\n' % (self._z_content_length, self._content_length),

317

self._z_content,

333

chunks = ['%s%d\n%d\n'

334

% (header, self._z_content_length, self._content_length),

318

335

]

336

chunks.extend(self._z_content_chunks)

337

total_len = sum(map(len, chunks))

338

return total_len, chunks

339

340

def to_bytes(self):

341

"""Encode the information into a byte stream."""

342

total_len, chunks = self.to_chunks()

319

343

return ''.join(chunks)

320

344

321

345

def _dump(self, include_text=False):

679

703

z_header_bytes = zlib.compress(header_bytes)

680

704

del header_bytes

681

705

z_header_bytes_len = len(z_header_bytes)

682

block_bytes = self._block.to_bytes()

706

block_bytes_len, block_chunks = self._block.to_chunks()

683

707

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

684

len(block_bytes)))

708

block_bytes_len))

685

709

lines.append(z_header_bytes)

686

lines.append(block_bytes)

687

del z_header_bytes, block_bytes

710

lines.extend(block_chunks)

711

del z_header_bytes, block_chunks

712

# TODO: This is a point where we will double the memory consumption. To

713

# avoid this, we probably have to switch to a 'chunked' api

688

714

return ''.join(lines)

689

715

690

716

@classmethod

691

717

def from_bytes(cls, bytes):

692

718

# TODO: This does extra string copying, probably better to do it a

693

# different way

719

# different way. At a minimum this creates 2 copies of the

720

# compressed content

694

721

(storage_kind, z_header_len, header_len,

695

722

block_len, rest) = bytes.split('\n', 4)

696

723

del bytes

854

881

855

882

After calling this, the compressor should no longer be used

856

883

"""

857

# TODO: this causes us to 'bloat' to 2x the size of content in the

858

# group. This has an impact for 'commit' of large objects.

859

# One possibility is to use self._content_chunks, and be lazy and

860

# only fill out self._content as a full string when we actually

861

# need it. That would at least drop the peak memory consumption

862

# for 'commit' down to ~1x the size of the largest file, at a

863

# cost of increased complexity within this code. 2x is still <<

864

# 3x the size of the largest file, so we are doing ok.

865

884

self._block.set_chunked_content(self.chunks, self.endpoint)

866

885

self.chunks = None

867

886

self._delta_index = None

1630

1649

self._unadded_refs = {}

1631

1650

keys_to_add = []

1632

1651

def flush():

1633

bytes = self._compressor.flush().to_bytes()

1652

bytes_len, chunks = self._compressor.flush().to_chunks()

1634

1653

self._compressor = GroupCompressor()

1654

# Note: At this point we still have 1 copy of the fulltext (in

1655

# record and the var 'bytes'), and this generates 2 copies of

1656

# the compressed text (one for bytes, one in chunks)

1657

# TODO: Push 'chunks' down into the _access api, so that we don't

1658

# have to double compressed memory here

1659

# TODO: Figure out how to indicate that we would be happy to free

1660

# the fulltext content at this point. Note that sometimes we

1661

# will want it later (streaming CHK pages), but most of the

1662

# time we won't (everything else)

1663

bytes = ''.join(chunks)

1664

del chunks

1635

1665

index, start, length = self._access.add_raw_records(

1636

1666

[(None, len(bytes))], bytes)[0]

1637

1667

nodes = []

1809

1839

return result

1810

1840

1811

1841

1842

class _GCBuildDetails(object):

1843

"""A blob of data about the build details.

1844

1845

This stores the minimal data, which then allows compatibility with the old

1846

api, without taking as much memory.

1847

"""

1848

1849

__slots__ = ('_index', '_group_start', '_group_end', '_basis_end',

1850

'_delta_end', '_parents')

1851

1852

method = 'group'

1853

compression_parent = None

1854

1855

def __init__(self, parents, position_info):

1856

self._parents = parents

1857

(self._index, self._group_start, self._group_end, self._basis_end,

1858

self._delta_end) = position_info

1859

1860

def __repr__(self):

1861

return '%s(%s, %s)' % (self.__class__.__name__,

1862

self.index_memo, self._parents)

1863

1864

@property

1865

def index_memo(self):

1866

return (self._index, self._group_start, self._group_end,

1867

self._basis_end, self._delta_end)

1868

1869

@property

1870

def record_details(self):

1871

return static_tuple.StaticTuple(self.method, None)

1872

1873

def __getitem__(self, offset):

1874

"""Compatibility thunk to act like a tuple."""

1875

if offset == 0:

1876

return self.index_memo

1877

elif offset == 1:

1878

return self.compression_parent # Always None

1879

elif offset == 2:

1880

return self._parents

1881

elif offset == 3:

1882

return self.record_details

1883

else:

1884

raise IndexError('offset out of range')

1885

1886

def __len__(self):

1887

return 4

1888

1889

1812

1890

class _GCGraphIndex(object):

1813

1891

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1814

1892

2009

2087

parents = None

2010

2088

else:

2011

2089

parents = entry[3][0]

2012

method = 'group'

2013

result[key] = (self._node_to_position(entry),

2014

None, parents, (method, None))

2090

details = _GCBuildDetails(parents, self._node_to_position(entry))

2091

result[key] = details

2015

2092

return result

2016

2093

2017

2094

def keys(self):

2033

2110

# each, or about 7MB. Note that it might be even more when you consider

2034

2111

# how PyInt is allocated in separate slabs. And you can't return a slab

2035

2112

# to the OS if even 1 int on it is in use. Note though that Python uses

2036

# a LIFO when re-using PyInt slots, which probably causes more

2113

# a LIFO when re-using PyInt slots, which might cause more

2037

2114

# fragmentation.

2038

2115

start = int(bits[0])

2039

2116

start = self._int_cache.setdefault(start, start)

Older »