~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/groupcompress.py

Committer: John Arbash Meinel
Date: 2009-11-08 20:28:36 UTC
mto: (4797.3.18 2.1.0b4-win32-accepted) (4789.26.16 2.1.0b4-win32-test-suite)
mto: This revision was merged to the branch mainline in revision 4800.
Revision ID: john@arbash-meinel.com-20091108202836-msndo75g100byj3p

Rework test_script a little bit.

Don't allow someone to request a stdin request to echo.
Echo never reads from stdin, it just echos its arguments.
You use 'cat' if you want to read from stdin.

A few other fixes because the tests were using filenames
that are actually illegal on Windows, rather than just
nonexistant.

Change the exception handling for commands so that
unknown errors don't get silently squashed and then
turn into hard-to-debug errors later.

test_script now passes on Windows.

files added:
bzrlib/_export_c_api.h

bzrlib/_import_c_api.h

bzrlib/_simple_set_pyx.pxd

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/_static_tuple_py.py

bzrlib/cleanup.py

bzrlib/static_tuple.py

bzrlib/tests/per_foreign_vcs

bzrlib/tests/per_foreign_vcs/__init__.py

bzrlib/tests/per_foreign_vcs/test_branch.py

bzrlib/tests/per_uifactory

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/script.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test__static_tuple.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_script.py

bzrlib/transport/pathfilter.py

tools/packaging/update-control.sh

files removed:
bzrlib/tests/test_patches_data/binary-after-normal.patch

doc/index.ja.txt

doc/ja

doc/ja/_static

doc/ja/_static/bzr icon 16.png

doc/ja/_static/bzr.ico

doc/ja/_templates

doc/ja/conf.py

doc/ja/index.txt

doc/ja/mini-tutorial

doc/ja/mini-tutorial/index.txt

doc/ja/tutorials

doc/ja/tutorials/centralized_workflow.txt

doc/ja/tutorials/index.txt

doc/ja/tutorials/tutorial.txt

doc/ja/tutorials/using_bazaar_with_launchpad.txt

doc/ja/upgrade-guide

doc/ja/upgrade-guide/data_migration.txt

doc/ja/upgrade-guide/index.txt

doc/ja/upgrade-guide/overview.txt

doc/ja/upgrade-guide/tips_and_tricks.txt

doc/ja/user-guide

doc/ja/user-guide/adv_merging.txt

doc/ja/user-guide/annotating_changes.txt

doc/ja/user-guide/bazaar_workflows.txt

doc/ja/user-guide/branching_a_project.txt

doc/ja/user-guide/browsing_history.txt

doc/ja/user-guide/bug_trackers.txt

doc/ja/user-guide/bzrtools_plugin.txt

doc/ja/user-guide/central_intro.txt

doc/ja/user-guide/configuring_bazaar.txt

doc/ja/user-guide/controlling_registration.txt

doc/ja/user-guide/core_concepts.txt

doc/ja/user-guide/distributed_intro.txt

doc/ja/user-guide/entering_commands.txt

doc/ja/user-guide/filtered_views.txt

doc/ja/user-guide/getting_help.txt

doc/ja/user-guide/hooks.txt

doc/ja/user-guide/http_smart_server.txt

doc/ja/user-guide/images

doc/ja/user-guide/images/workflows_centralized.png

doc/ja/user-guide/images/workflows_centralized.svg

doc/ja/user-guide/images/workflows_gatekeeper.png

doc/ja/user-guide/images/workflows_gatekeeper.svg

doc/ja/user-guide/images/workflows_localcommit.png

doc/ja/user-guide/images/workflows_localcommit.svg

doc/ja/user-guide/images/workflows_peer.png

doc/ja/user-guide/images/workflows_peer.svg

doc/ja/user-guide/images/workflows_pqm.png

doc/ja/user-guide/images/workflows_pqm.svg

doc/ja/user-guide/images/workflows_shared.png

doc/ja/user-guide/images/workflows_shared.svg

doc/ja/user-guide/images/workflows_single.png

doc/ja/user-guide/images/workflows_single.svg

doc/ja/user-guide/index.txt

doc/ja/user-guide/installing_bazaar.txt

doc/ja/user-guide/introducing_bazaar.txt

doc/ja/user-guide/merging_changes.txt

doc/ja/user-guide/organizing_branches.txt

doc/ja/user-guide/organizing_your_workspace.txt

doc/ja/user-guide/part2_intro.txt

doc/ja/user-guide/partner_intro.txt

doc/ja/user-guide/plugins.txt

doc/ja/user-guide/publishing_a_branch.txt

doc/ja/user-guide/recording_changes.txt

doc/ja/user-guide/releasing_a_project.txt

doc/ja/user-guide/resolving_conflicts.txt

doc/ja/user-guide/reusing_a_checkout.txt

doc/ja/user-guide/reviewing_changes.txt

doc/ja/user-guide/sending_changes.txt

doc/ja/user-guide/server.txt

doc/ja/user-guide/setting_up_email.txt

doc/ja/user-guide/shared_repository_layouts.txt

doc/ja/user-guide/shelving_changes.txt

doc/ja/user-guide/solo_intro.txt

doc/ja/user-guide/specifying_revisions.txt

doc/ja/user-guide/stacked.txt

doc/ja/user-guide/starting_a_project.txt

doc/ja/user-guide/svn_plugin.txt

doc/ja/user-guide/undoing_mistakes.txt

doc/ja/user-guide/using_aliases.txt

doc/ja/user-guide/using_checkouts.txt

doc/ja/user-guide/using_gatekeepers.txt

doc/ja/user-guide/version_info.txt

doc/ja/user-guide/web_browsing.txt

doc/ja/user-guide/working_offline_central.txt

doc/ja/user-guide/writing_a_plugin.txt

doc/ja/user-guide/zen.txt

doc/ja/user-reference

doc/ja/user-reference/index.txt

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/annotate.py

bzrlib/bencode.py

bzrlib/branch.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/apply_bundle.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff-delta.c

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/export/dir_exporter.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/send.py

bzrlib/shellcomplete.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_utils.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_version.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/transport_util.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/upgrade.py

bzrlib/util/_bencode_py.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

doc/developers/HACKING.txt

doc/developers/bug-handling.txt

doc/developers/integration.txt

doc/developers/network-protocol.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/server.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/writing_a_plugin.txt

doc/index.txt

setup.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

119

:param num_bytes: Ensure that we have extracted at least num_bytes of

120

content. If None, consume everything

121

"""

122

# TODO: If we re-use the same content block at different times during

123

# get_record_stream(), it is possible that the first pass will

124

# get inserted, triggering an extract/_ensure_content() which

125

# will get rid of _z_content. And then the next use of the block

126

# will try to access _z_content (to send it over the wire), and

127

# fail because it is already extracted. Consider never releasing

128

# _z_content because of this.

122

if self._content_length is None:

123

raise AssertionError('self._content_length should never be None')

129

124

if num_bytes is None:

130

125

num_bytes = self._content_length

131

126

elif (self._content_length is not None

148

143

self._content = pylzma.decompress(self._z_content)

149

144

elif self._compressor_name == 'zlib':

150

145

# Start a zlib decompressor

151

if num_bytes is None:

146

if num_bytes * 4 > self._content_length * 3:

147

# If we are requesting more that 3/4ths of the content,

148

# just extract the whole thing in a single pass

149

num_bytes = self._content_length

152

150

self._content = zlib.decompress(self._z_content)

153

151

else:

154

152

self._z_content_decompressor = zlib.decompressobj()

156

154

# that the rest of the code is simplified

157

155

self._content = self._z_content_decompressor.decompress(

158

156

self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)

157

if not self._z_content_decompressor.unconsumed_tail:

158

self._z_content_decompressor = None

159

else:

160

raise AssertionError('Unknown compressor: %r'

161

% self._compressor_name)

163

# 'unconsumed_tail'

164

165

# Do we have enough bytes already?

166

if num_bytes is not None and len(self._content) >= num_bytes:

167

return

168

if num_bytes is None and self._z_content_decompressor is None:

169

# We must have already decompressed everything

166

if len(self._content) >= num_bytes:

170

167

return

171

168

# If we got this far, and don't have a decompressor, something is wrong

172

169

if self._z_content_decompressor is None:

173

170

raise AssertionError(

174

171

'No decompressor to decompress %d bytes' % num_bytes)

175

172

remaining_decomp = self._z_content_decompressor.unconsumed_tail

176

if num_bytes is None:

177

if remaining_decomp:

178

# We don't know how much is left, but we'll decompress it all

179

self._content += self._z_content_decompressor.decompress(

180

remaining_decomp)

181

# Note: There's what I consider a bug in zlib.decompressobj

182

# If you pass back in the entire unconsumed_tail, only

183

# this time you don't pass a max-size, it doesn't

184

# change the unconsumed_tail back to None/''.

185

# However, we know we are done with the whole stream

186

self._z_content_decompressor = None

187

# XXX: Why is this the only place in this routine we set this?

188

self._content_length = len(self._content)

189

else:

190

if not remaining_decomp:

191

raise AssertionError('Nothing left to decompress')

192

needed_bytes = num_bytes - len(self._content)

193

# We always set max_size to 32kB over the minimum needed, so that

194

# zlib will give us as much as we really want.

195

# TODO: If this isn't good enough, we could make a loop here,

196

# that keeps expanding the request until we get enough

197

self._content += self._z_content_decompressor.decompress(

198

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

199

if len(self._content) < num_bytes:

200

raise AssertionError('%d bytes wanted, only %d available'

201

% (num_bytes, len(self._content)))

202

if not self._z_content_decompressor.unconsumed_tail:

203

# The stream is finished

204

self._z_content_decompressor = None

173

if not remaining_decomp:

174

raise AssertionError('Nothing left to decompress')

175

needed_bytes = num_bytes - len(self._content)

176

# We always set max_size to 32kB over the minimum needed, so that

177

# zlib will give us as much as we really want.

178

# TODO: If this isn't good enough, we could make a loop here,

179

# that keeps expanding the request until we get enough

180

self._content += self._z_content_decompressor.decompress(

181

remaining_decomp, needed_bytes + _ZLIB_DECOMP_WINDOW)

182

if len(self._content) < num_bytes:

183

raise AssertionError('%d bytes wanted, only %d available'

184

% (num_bytes, len(self._content)))

185

if not self._z_content_decompressor.unconsumed_tail:

186

# The stream is finished

187

self._z_content_decompressor = None

205

188

206

189

def _parse_bytes(self, bytes, pos):

207

190

"""Read the various lengths from the header.

1282

1265

else:

1283

1266

return self.get_record_stream(keys, 'unordered', True)

1284

1267

1268

def clear_cache(self):

1269

"""See VersionedFiles.clear_cache()"""

1270

self._group_cache.clear()

1271

self._index._graph_index.clear_cache()

1272

self._index._int_cache.clear()

1273

1285

1274

def _check_add(self, key, lines, random_id, check_content):

1286

1275

"""check that version_id and lines are safe to add."""

1287

1276

version_id = key[-1]

1844

1833

self.has_graph = parents

1845

1834

self._is_locked = is_locked

1846

1835

self._inconsistency_fatal = inconsistency_fatal

1836

# GroupCompress records tend to have the same 'group' start + offset

1837

# repeated over and over, this creates a surplus of ints

1838

self._int_cache = {}

1847

1839

if track_external_parent_refs:

1848

1840

self._key_dependencies = knit._KeyRefs(

1849

1841

track_new_keys=track_new_keys)

2025

2017

"""Convert an index value to position details."""

2026

2018

bits = node[2].split(' ')

2027

2019

# It would be nice not to read the entire gzip.

2020

# start and stop are put into _int_cache because they are very common.

2021

# They define the 'group' that an entry is in, and many groups can have

2022

# thousands of objects.

2023

# Branching Launchpad, for example, saves ~600k integers, at 12 bytes

2024

# each, or about 7MB. Note that it might be even more when you consider

2025

# how PyInt is allocated in separate slabs. And you can't return a slab

2026

# to the OS if even 1 int on it is in use. Note though that Python uses

2027

# a LIFO when re-using PyInt slots, which probably causes more

2028

# fragmentation.

2028

2029

start = int(bits[0])

2030

start = self._int_cache.setdefault(start, start)

2029

2031

stop = int(bits[1])

2032

stop = self._int_cache.setdefault(stop, stop)

2030

2033

basis_end = int(bits[2])

2031

2034

delta_end = int(bits[3])

2032

return node[0], start, stop, basis_end, delta_end

2035

# We can't use StaticTuple here, because node[0] is a BTreeGraphIndex

2036

# instance...

2037

return (node[0], start, stop, basis_end, delta_end)

2033

2038

2034

2039

def scan_unvalidated_index(self, graph_index):

2035

2040

"""Inform this _GCGraphIndex that there is an unvalidated index.

2066

2071

decode_base128_int,

2067

2072

)

2068

2073

GroupCompressor = PyrexGroupCompressor

2069

except ImportError:

2074

except ImportError, e:

2075

osutils.failed_to_load_extension(e)

2070

2076

GroupCompressor = PythonGroupCompressor

2071

2077

Older »