~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Aaron Bentley
Date: 2009-06-26 03:44:30 UTC
mfrom: (4481 +trunk)
mto: This revision was merged to the branch mainline in revision 4482.
Revision ID: aaron@aaronbentley.com-20090626034430-5btbqa44ikywccsu

Merge bzr.dev into vpipe

files renamed:
bzrlib/_btree_serializer_c.pyx => bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_c.h => bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_c.pyx => bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_knit_load_data_c.pyx => bzrlib/_knit_load_data_pyx.pyx

files modified:
.bzrignore

NEWS

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/benchmarks/bench_dirstate.py

bzrlib/branch.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/chk_map.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/groupcompress.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/shelf_ui.py

bzrlib/switch.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/bzrdir_implementations/test_push.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_versionedfile.py

bzrlib/transport/local.py

bzrlib/tuned_gzip.py

bzrlib/ui/text.py

bzrlib/util/bencode.py

bzrlib/versionedfile.py

bzrlib/workingtree_4.py

setup.py

tools/time_graph.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

108

self._z_content_length = None

109

self._content_length = None

110

self._content = None

111

self._content_chunks = None

111

112

113

def __len__(self):

113

114

# This is the maximum number of bytes this object will reference if

137

138

% (num_bytes, self._content_length))

138

139

# Expand the content if required

139

140

if self._content is None:

141

if self._content_chunks is not None:

142

self._content = ''.join(self._content_chunks)

143

self._content_chunks = None

144

if self._content is None:

140

145

if self._z_content is None:

141

146

raise AssertionError('No content to decompress')

142

147

if self._z_content == '':

273

278

bytes = apply_delta_to_source(self._content, content_start, end)

274

279

return bytes

275

280

281

def set_chunked_content(self, content_chunks, length):

282

"""Set the content of this block to the given chunks."""

283

# If we have lots of short lines, it is may be more efficient to join

284

# the content ahead of time. If the content is <10MiB, we don't really

285

# care about the extra memory consumption, so we can just pack it and

286

# be done. However, timing showed 18s => 17.9s for repacking 1k revs of

287

# mysql, which is below the noise margin

288

self._content_length = length

289

self._content_chunks = content_chunks

290

self._content = None

291

self._z_content = None

292

276

293

def set_content(self, content):

277

294

"""Set the content of this block."""

278

295

self._content_length = len(content)

279

296

self._content = content

280

297

self._z_content = None

281

298

299

def _create_z_content_using_lzma(self):

300

if self._content_chunks is not None:

301

self._content = ''.join(self._content_chunks)

302

self._content_chunks = None

303

if self._content is None:

304

raise AssertionError('Nothing to compress')

305

self._z_content = pylzma.compress(self._content)

306

self._z_content_length = len(self._z_content)

307

308

def _create_z_content_from_chunks(self):

309

compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)

310

compressed_chunks = map(compressor.compress, self._content_chunks)

311

compressed_chunks.append(compressor.flush())

312

self._z_content = ''.join(compressed_chunks)

313

self._z_content_length = len(self._z_content)

314

315

def _create_z_content(self):

316

if self._z_content is not None:

317

return

318

if _USE_LZMA:

319

self._create_z_content_using_lzma()

320

return

321

if self._content_chunks is not None:

322

self._create_z_content_from_chunks()

323

return

324

self._z_content = zlib.compress(self._content)

325

self._z_content_length = len(self._z_content)

326

282

327

def to_bytes(self):

283

328

"""Encode the information into a byte stream."""

284

compress = zlib.compress

285

if _USE_LZMA:

286

compress = pylzma.compress

287

if self._z_content is None:

288

if self._content is None:

289

raise AssertionError('Nothing to compress')

290

self._z_content = compress(self._content)

291

self._z_content_length = len(self._z_content)

329

self._create_z_content()

292

330

if _USE_LZMA:

293

331

header = self.GCB_LZ_HEADER

294

332

else:

762

800

# for 'commit' down to ~1x the size of the largest file, at a

763

801

# cost of increased complexity within this code. 2x is still <<

764

802

# 3x the size of the largest file, so we are doing ok.

765

content = ''.join(self.chunks)

803

self._block.set_chunked_content(self.chunks, self.endpoint)

766

804

self.chunks = None

767

805

self._delta_index = None

768

self._block.set_content(content)

769

806

return self._block

770

807

771

808

def pop_last(self):

1008

1045

nostore_sha=nostore_sha))[0]

1009

1046

return sha1, length, None

1010

1047

1048

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

1049

"""See VersionedFiles._add_text()."""

1050

self._index._check_write_ok()

1051

self._check_add(key, None, random_id, check_content=False)

1052

if text.__class__ is not str:

1053

raise errors.BzrBadParameterUnicode("text")

1054

if parents is None:

1055

# The caller might pass None if there is no graph data, but kndx

1056

# indexes can't directly store that, so we give them

1057

# an empty tuple instead.

1058

parents = ()

1059

# double handling for now. Make it work until then.

1060

length = len(text)

1061

record = FulltextContentFactory(key, parents, None, text)

1062

sha1 = list(self._insert_record_stream([record], random_id=random_id,

1063

nostore_sha=nostore_sha))[0]

1064

return sha1, length, None

1065

1011

1066

def add_fallback_versioned_files(self, a_versioned_files):

1012

1067

"""Add a source of texts for texts not present in this knit.

1013

1068

1521

1576

1522

1577

:return: An iterator over (line, key).

1523

1578

"""

1524

if pb is None:

1525

pb = progress.DummyProgress()

1526

1579

keys = set(keys)

1527

1580

total = len(keys)

1528

1581

# we don't care about inclusions, the caller cares.

1532

1585

'unordered', True)):

1533

1586

# XXX: todo - optimise to use less than full texts.

1534

1587

key = record.key

1535

pb.update('Walking content', key_idx, total)

1588

if pb is not None:

1589

pb.update('Walking content', key_idx, total)

1536

1590

if record.storage_kind == 'absent':

1537

1591

raise errors.RevisionNotPresent(key, self)

1538

1592

lines = osutils.split_lines(record.get_bytes_as('fulltext'))

1539

1593

for line in lines:

1540

1594

yield line, key

1541

pb.update('Walking content', total, total)

1595

if pb is not None:

1596

pb.update('Walking content', total, total)

1542

1597

1543

1598

def keys(self):

1544

1599

"""See VersionedFiles.keys."""

1605

1660

if refs:

1606

1661

for ref in refs:

1607

1662

if ref:

1608

raise KnitCorrupt(self,

1663

raise errors.KnitCorrupt(self,

1609

1664

"attempt to add node with parents "

1610

1665

"in parentless index.")

1611

1666

refs = ()

1668

1723

if check_present:

1669

1724

missing_keys = keys.difference(found_keys)

1670

1725

if missing_keys:

1671

raise RevisionNotPresent(missing_keys.pop(), self)

1726

raise errors.RevisionNotPresent(missing_keys.pop(), self)

1672

1727

1673

1728

def get_parent_map(self, keys):

1674

1729

"""Get a map of the parents of keys.

Older »