~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/tests/test_groupcompress.py

Committer: John Arbash Meinel
Date: 2009-10-17 04:43:14 UTC
mto: This revision was merged to the branch mainline in revision 4756.
Revision ID: john@arbash-meinel.com-20091017044314-nlvrrqnz0f2wzcp4

change the GroupcompressBlock code a bit.
If the first decompress request is big enough, just decompress everything.
And when we do that, let go of the decompressobj.

After digging through the zlib code, it looks like 1 zlib stream object
contains a 5kB internal state, and another 4*64kB buffers. (about 260kB
of total state.)
That turns out to be quite a lot if you think about it.

In the case of branching a copy of 'bzr.dev' locally, this turned out
to be 383MB w/ bzr.dev and 345MB w/ only this patch. (So ~11% of peak).

Also, this was 'unreferenced' memory, because it is hidden in the
zlib internal state in working buffers. So it wasn't memory that Meliae
could find. \o/.

files modified:
NEWS

bzrlib/groupcompress.py

bzrlib/tests/test_groupcompress.py

Show diffs side-by-side

added added

removed removed

bzrlib/tests/test_groupcompress.py

418

# And the decompressor is finalized

419

self.assertIs(None, block._z_content_decompressor)

420

421

def test_partial_decomp_no_known_length(self):

421

def test__ensure_all_content(self):

422

content_chunks = []

423

# We need a sufficient amount of data so that zlib.decompress has

424

# partial decompression to work with. Most auto-generated data

425

# compresses a bit too well, we want a combination, so we combine a sha

426

# hash with compressible data.

423

427

for i in xrange(2048):

424

428

next_content = '%d\nThis is a bit of duplicate text\n' % (i,)

425

429

content_chunks.append(next_content)

433

437

block._z_content = z_content

434

438

block._z_content_length = len(z_content)

435

439

block._compressor_name = 'zlib'

436

block._content_length = None # Don't tell the decompressed length

440

block._content_length = 158634

437

441

self.assertIs(None, block._content)

438

block._ensure_content(100)

439

self.assertIsNot(None, block._content)

440

# We have decompressed at least 100 bytes

441

self.assertTrue(len(block._content) >= 100)

442

# We have not decompressed the whole content

443

self.assertTrue(len(block._content) < 158634)

444

self.assertEqualDiff(content[:len(block._content)], block._content)

445

# ensuring content that we already have shouldn't cause any more data

446

# to be extracted

447

cur_len = len(block._content)

448

block._ensure_content(cur_len - 10)

449

self.assertEqual(cur_len, len(block._content))

450

# Now we want a bit more content

451

cur_len += 10

452

block._ensure_content(cur_len)

453

self.assertTrue(len(block._content) >= cur_len)

454

self.assertTrue(len(block._content) < 158634)

455

self.assertEqualDiff(content[:len(block._content)], block._content)

456

# And now lets finish

457

block._ensure_content()

442

# The first _ensure_content got all of the required data

443

block._ensure_content(158634)

458

444

self.assertEqualDiff(content, block._content)

459

# And the decompressor is finalized

445

# And we should have released the _z_content_decompressor since it was

446

# fully consumed

460

447

self.assertIs(None, block._z_content_decompressor)

461

448

462

449

def test__dump(self):

Older »