~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Jelmer Vernooij
Date: 2010-12-20 11:57:14 UTC
mto: This revision was merged to the branch mainline in revision 5577.
Revision ID: jelmer@samba.org-20101220115714-2ru3hfappjweeg7q

Don't use no-plugins.

files added:
NEWS

bzrlib/cmd_test_script.py

bzrlib/pyutils.py

bzrlib/tests/blackbox/test_config.py

bzrlib/tests/blackbox/test_resolve.py

bzrlib/tests/blackbox/test_script.py

bzrlib/tests/per_bzrdir

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_tree/test_is_executable.py

bzrlib/tests/scenarios.py

bzrlib/tests/test_pyutils.py

bzrlib/tests/test_scenarios.py

bzrlib/tests/testui.py

doc/developers/documenting-changes.txt

doc/developers/ui.txt

doc/developers/xdg_config_spec.txt

doc/en/release-notes/bzr-0.1.txt

doc/en/release-notes/bzr-0.10.txt

doc/en/release-notes/bzr-0.11.txt

doc/en/release-notes/bzr-0.12.txt

doc/en/release-notes/bzr-0.13.txt

doc/en/release-notes/bzr-0.14.txt

doc/en/release-notes/bzr-0.15.txt

doc/en/release-notes/bzr-0.16.txt

doc/en/release-notes/bzr-0.17.txt

doc/en/release-notes/bzr-0.18.txt

doc/en/release-notes/bzr-0.6.txt

doc/en/release-notes/bzr-0.7.txt

doc/en/release-notes/bzr-0.8.txt

doc/en/release-notes/bzr-0.9.txt

doc/en/release-notes/bzr-0.90.txt

doc/en/release-notes/bzr-0.91.txt

doc/en/release-notes/bzr-0.92.txt

doc/en/release-notes/bzr-1.0.txt

doc/en/release-notes/bzr-1.1.txt

doc/en/release-notes/bzr-1.10.txt

doc/en/release-notes/bzr-1.11.txt

doc/en/release-notes/bzr-1.12.txt

doc/en/release-notes/bzr-1.13.txt

doc/en/release-notes/bzr-1.14.txt

doc/en/release-notes/bzr-1.15.txt

doc/en/release-notes/bzr-1.16.txt

doc/en/release-notes/bzr-1.17.txt

doc/en/release-notes/bzr-1.18.txt

doc/en/release-notes/bzr-1.2.txt

doc/en/release-notes/bzr-1.3.txt

doc/en/release-notes/bzr-1.4.txt

doc/en/release-notes/bzr-1.5.txt

doc/en/release-notes/bzr-1.6.txt

doc/en/release-notes/bzr-1.7.txt

doc/en/release-notes/bzr-1.8.txt

doc/en/release-notes/bzr-1.9.txt

doc/en/release-notes/bzr-2.0.txt

doc/en/release-notes/bzr-2.1.txt

doc/en/release-notes/bzr-2.2.txt

doc/en/release-notes/series-template.txt

tools/fixed-in.py

files renamed:
NEWS => doc/en/release-notes/bzr-2.3.txt

NEWS-template.txt => doc/en/release-notes/release-template.txt

files modified:
.bzrignore

.testr.conf

Makefile

README

bzrlib/__init__.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bzrdir.py

bzrlib/chk_map.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/controldir.py

bzrlib/crash.py

bzrlib/delta.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/__init__.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/conflict-types.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/hooks.py

bzrlib/ignores.py

bzrlib/inventory.py

bzrlib/lazy_import.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_api.py

bzrlib/plugins/launchpad/lp_directory.py

bzrlib/plugins/launchpad/lp_propose.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rules.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/bzrdir.py

bzrlib/smart/repository.py

bzrlib/status.py

bzrlib/strace.py

bzrlib/tag.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_filtered_view_ops.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_view.py

bzrlib/tests/doc_generate/__init__.py

bzrlib/tests/doc_generate/builders/test_texinfo.py

bzrlib/tests/doc_generate/writers/test_texinfo.py

bzrlib/tests/features.py

bzrlib/tests/https_server.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_parent.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_controldir/test_controldir.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/script.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__bencode.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bugtracker.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_delta.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_https_ca_bundle.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_script.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_server.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_test_server.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_upgrade_stacked.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/ssh.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/urlutils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/authentication-ring.txt

doc/developers/bug-handling.txt

doc/developers/check.txt

doc/developers/code-review.txt

doc/developers/code-style.txt

doc/developers/content-filtering.txt

doc/developers/contribution-quickstart.txt

doc/developers/cycle.txt

doc/developers/directory-fingerprints.txt

doc/developers/ec2.txt

doc/developers/groupcompress-design.txt

doc/developers/incremental-push-pull.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/initial-push-pull.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/network-protocol.txt

doc/developers/packrepo.txt

doc/developers/performance-roadmap-rationale.txt

doc/developers/performance-use-case-analysis.txt

doc/developers/planned-change-integration.txt

doc/developers/planned-performance-changes.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/repository.txt

doc/developers/revert.txt

doc/developers/testing.txt

doc/developers/tortoise-strategy.txt

doc/developers/transports.txt

doc/developers/win32_build_setup.txt

doc/en/_templates/index.html

doc/en/admin-guide/other-setups.txt

doc/en/admin-guide/simple-setups.txt

doc/en/conf.py

doc/en/mini-tutorial/index.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/data_migration.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/http_smart_server.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/sending_changes.txt

doc/en/user-guide/server.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/shared_repository_layouts.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/version_info.txt

doc/en/user-guide/writing_a_plugin.txt

doc/en/whats-new/whats-new-in-2.1.txt

doc/en/whats-new/whats-new-in-2.2.txt

doc/en/whats-new/whats-new-in-2.3.txt

doc/es/mini-tutorial/index.txt

doc/ja/user-guide/http_smart_server.txt

doc/ja/user-reference/index.txt

doc/ru/tutorials/tutorial.txt

doc/ru/user-guide/branching_a_project.txt

setup.py

tools/check-newsbugs.py

tools/generate_release_notes.py

tools/win32/bazaar.url

tools/win32/build_release.py

tools/win32/buildout.cfg

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/start_bzr.bat

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

101

def __init__(self):

102

# map by key? or just order in file?

103

self._compressor_name = None

104

self._z_content = None

104

self._z_content_chunks = None

105

self._z_content_decompressor = None

106

self._z_content_length = None

107

self._content_length = None

135

self._content = ''.join(self._content_chunks)

136

self._content_chunks = None

137

if self._content is None:

138

if self._z_content is None:

138

# We join self._z_content_chunks here, because if we are

139

# decompressing, then it is *very* likely that we have a single

140

# chunk

141

if self._z_content_chunks is None:

139

142

raise AssertionError('No content to decompress')

140

if self._z_content == '':

143

z_content = ''.join(self._z_content_chunks)

144

if z_content == '':

141

145

self._content = ''

142

146

elif self._compressor_name == 'lzma':

143

147

# We don't do partial lzma decomp yet

144

self._content = pylzma.decompress(self._z_content)

148

self._content = pylzma.decompress(z_content)

145

149

elif self._compressor_name == 'zlib':

146

150

# Start a zlib decompressor

147

151

if num_bytes * 4 > self._content_length * 3:

148

152

# If we are requesting more that 3/4ths of the content,

149

153

# just extract the whole thing in a single pass

150

154

num_bytes = self._content_length

151

self._content = zlib.decompress(self._z_content)

155

self._content = zlib.decompress(z_content)

152

156

else:

153

157

self._z_content_decompressor = zlib.decompressobj()

154

158

# Seed the decompressor with the uncompressed bytes, so

155

159

# that the rest of the code is simplified

156

160

self._content = self._z_content_decompressor.decompress(

157

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

161

z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

158

162

if not self._z_content_decompressor.unconsumed_tail:

159

163

self._z_content_decompressor = None

160

164

else:

207

211

# XXX: Define some GCCorrupt error ?

208

212

raise AssertionError('Invalid bytes: (%d) != %d + %d' %

209

213

(len(bytes), pos, self._z_content_length))

210

self._z_content = bytes[pos:]

214

self._z_content_chunks = (bytes[pos:],)

215

216

@property

217

def _z_content(self):

218

"""Return z_content_chunks as a simple string.

219

220

Meant only to be used by the test suite.

221

"""

222

if self._z_content_chunks is not None:

223

return ''.join(self._z_content_chunks)

224

return None

211

225

212

226

@classmethod

213

227

def from_bytes(cls, bytes):

269

283

self._content_length = length

270

284

self._content_chunks = content_chunks

271

285

self._content = None

272

self._z_content = None

286

self._z_content_chunks = None

273

287

274

288

def set_content(self, content):

275

289

"""Set the content of this block."""

276

290

self._content_length = len(content)

277

291

self._content = content

278

self._z_content = None

292

self._z_content_chunks = None

279

293

280

294

def _create_z_content_using_lzma(self):

281

295

if self._content_chunks is not None:

283

297

self._content_chunks = None

284

298

if self._content is None:

285

299

raise AssertionError('Nothing to compress')

286

self._z_content = pylzma.compress(self._content)

287

self._z_content_length = len(self._z_content)

300

z_content = pylzma.compress(self._content)

301

self._z_content_chunks = (z_content,)

302

self._z_content_length = len(z_content)

288

303

289

def _create_z_content_from_chunks(self):

304

def _create_z_content_from_chunks(self, chunks):

290

305

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

291

compressed_chunks = map(compressor.compress, self._content_chunks)

306

# Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead

307

# (measured peak is maybe 30MB over the above...)

308

compressed_chunks = map(compressor.compress, chunks)

292

309

compressed_chunks.append(compressor.flush())

293

self._z_content = ''.join(compressed_chunks)

294

self._z_content_length = len(self._z_content)

310

# Ignore empty chunks

311

self._z_content_chunks = [c for c in compressed_chunks if c]

312

self._z_content_length = sum(map(len, self._z_content_chunks))

295

313

296

314

def _create_z_content(self):

297

if self._z_content is not None:

315

if self._z_content_chunks is not None:

298

316

return

299

317

if _USE_LZMA:

300

318

self._create_z_content_using_lzma()

301

319

return

302

320

if self._content_chunks is not None:

303

self._create_z_content_from_chunks()

304

return

305

self._z_content = zlib.compress(self._content)

306

self._z_content_length = len(self._z_content)

321

chunks = self._content_chunks

322

else:

323

chunks = (self._content,)

324

self._create_z_content_from_chunks(chunks)

307

325

308

def to_bytes(self):

309

"""Encode the information into a byte stream."""

326

def to_chunks(self):

327

"""Create the byte stream as a series of 'chunks'"""

310

328

self._create_z_content()

311

329

if _USE_LZMA:

312

330

header = self.GCB_LZ_HEADER

313

331

else:

314

332

header = self.GCB_HEADER

315

chunks = [header,

316

'%d\n%d\n' % (self._z_content_length, self._content_length),

317

self._z_content,

333

chunks = ['%s%d\n%d\n'

334

% (header, self._z_content_length, self._content_length),

318

335

]

336

chunks.extend(self._z_content_chunks)

337

total_len = sum(map(len, chunks))

338

return total_len, chunks

339

340

def to_bytes(self):

341

"""Encode the information into a byte stream."""

342

total_len, chunks = self.to_chunks()

319

343

return ''.join(chunks)

320

344

321

345

def _dump(self, include_text=False):

679

703

z_header_bytes = zlib.compress(header_bytes)

680

704

del header_bytes

681

705

z_header_bytes_len = len(z_header_bytes)

682

block_bytes = self._block.to_bytes()

706

block_bytes_len, block_chunks = self._block.to_chunks()

683

707

lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,

684

len(block_bytes)))

708

block_bytes_len))

685

709

lines.append(z_header_bytes)

686

lines.append(block_bytes)

687

del z_header_bytes, block_bytes

710

lines.extend(block_chunks)

711

del z_header_bytes, block_chunks

712

# TODO: This is a point where we will double the memory consumption. To

713

# avoid this, we probably have to switch to a 'chunked' api

688

714

return ''.join(lines)

689

715

690

716

@classmethod

691

717

def from_bytes(cls, bytes):

692

718

# TODO: This does extra string copying, probably better to do it a

693

# different way

719

# different way. At a minimum this creates 2 copies of the

720

# compressed content

694

721

(storage_kind, z_header_len, header_len,

695

722

block_len, rest) = bytes.split('\n', 4)

696

723

del bytes

854

881

855

882

After calling this, the compressor should no longer be used

856

883

"""

857

# TODO: this causes us to 'bloat' to 2x the size of content in the

858

# group. This has an impact for 'commit' of large objects.

859

# One possibility is to use self._content_chunks, and be lazy and

860

# only fill out self._content as a full string when we actually

861

# need it. That would at least drop the peak memory consumption

862

# for 'commit' down to ~1x the size of the largest file, at a

863

# cost of increased complexity within this code. 2x is still <<

864

# 3x the size of the largest file, so we are doing ok.

865

884

self._block.set_chunked_content(self.chunks, self.endpoint)

866

885

self.chunks = None

867

886

self._delta_index = None

1630

1649

self._unadded_refs = {}

1631

1650

keys_to_add = []

1632

1651

def flush():

1633

bytes = self._compressor.flush().to_bytes()

1652

bytes_len, chunks = self._compressor.flush().to_chunks()

1634

1653

self._compressor = GroupCompressor()

1654

# Note: At this point we still have 1 copy of the fulltext (in

1655

# record and the var 'bytes'), and this generates 2 copies of

1656

# the compressed text (one for bytes, one in chunks)

1657

# TODO: Push 'chunks' down into the _access api, so that we don't

1658

# have to double compressed memory here

1659

# TODO: Figure out how to indicate that we would be happy to free

1660

# the fulltext content at this point. Note that sometimes we

1661

# will want it later (streaming CHK pages), but most of the

1662

# time we won't (everything else)

1663

bytes = ''.join(chunks)

1664

del chunks

1635

1665

index, start, length = self._access.add_raw_records(

1636

1666

[(None, len(bytes))], bytes)[0]

1637

1667

nodes = []

Older »