~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/groupcompress.py

Committer: Robert J. Tanner
Date: 2009-06-10 03:56:49 UTC
mfrom: (4423 +trunk)
mto: This revision was merged to the branch mainline in revision 4425.
Revision ID: tanner@real-time.com-20090610035649-7rfx4cls4550zc3c

Merge 1.15.1 back to trunk

files added:
bzrlib/_bencode_pyx.h

bzrlib/_bencode_pyx.pyx

bzrlib/_rio_py.py

bzrlib/_rio_pyx.pyx

bzrlib/benchmarks/bench_tags.py

bzrlib/bencode.py

bzrlib/send.py

bzrlib/tests/per_repository_reference/test_fetch.py

bzrlib/tests/per_repository_reference/test_initialize.py

bzrlib/tests/per_repository_reference/test_unlock.py

bzrlib/tests/test__rio.py

bzrlib/tests/test_bencode.py

bzrlib/tests/test_chk_serializer.py

bzrlib/util/bencode.py

files removed:
bzrlib/util/tests/test_bencode.py

files renamed:
bzrlib/tests/workingtree_implementations/test_get_file_with_stat.py => bzrlib/tests/tree_implementations/test_get_file_with_stat.py

bzrlib/util/bencode.py => bzrlib/util/_bencode_py.py

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_groupcompress_pyx.pyx

bzrlib/benchmarks/__init__.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/chk_serializer.py

bzrlib/commands.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/server.py

bzrlib/tag.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_iter_reverse_revision_history.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_revision.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_default_stacking.py

bzrlib/tests/test__groupcompress.py

bzrlib/tests/test_config.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_http.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_serializer.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_source.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_ui.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/en/user-guide/svn_plugin.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/groupcompress.py

diff,

errors,

graph as _mod_graph,

knit,

osutils,

pack,

patiencediff,

trace,

)

from bzrlib.graph import Graph

from bzrlib.knit import _DirectPackAccess

from bzrlib.btree_index import BTreeBuilder

from bzrlib.lru_cache import LRUSizeCache

from bzrlib.tsort import topo_sort

324

raise ValueError('invalid content_len %d for record @ pos %d'

325

% (content_len, pos - len_len - 1))

326

if kind == 'f': # Fulltext

327

result.append(('f', content_len))

327

if include_text:

328

text = self._content[pos:pos+content_len]

329

result.append(('f', content_len, text))

330

else:

331

result.append(('f', content_len))

328

332

elif kind == 'd': # Delta

329

333

delta_content = self._content[pos:pos+content_len]

330

334

delta_info = []

339

343

(offset, length,

340

344

delta_pos) = decode_copy_instruction(delta_content, c,

341

345

delta_pos)

342

delta_info.append(('c', offset, length))

346

if include_text:

347

text = self._content[offset:offset+length]

348

delta_info.append(('c', offset, length, text))

349

else:

350

delta_info.append(('c', offset, length))

343

351

measured_len += length

344

352

else: # Insert

345

353

if include_text:

746

754

747

755

After calling this, the compressor should no longer be used

748

756

"""

757

# TODO: this causes us to 'bloat' to 2x the size of content in the

758

# group. This has an impact for 'commit' of large objects.

759

# One possibility is to use self._content_chunks, and be lazy and

760

# only fill out self._content as a full string when we actually

761

# need it. That would at least drop the peak memory consumption

762

# for 'commit' down to ~1x the size of the largest file, at a

763

# cost of increased complexity within this code. 2x is still <<

764

# 3x the size of the largest file, so we are doing ok.

749

765

content = ''.join(self.chunks)

750

766

self.chunks = None

751

767

self._delta_index = None

911

927

writer.begin()

912

928

index = _GCGraphIndex(graph_index, lambda:True, parents=parents,

913

929

add_callback=graph_index.add_nodes)

914

access = _DirectPackAccess({})

930

access = knit._DirectPackAccess({})

915

931

access.set_writer(writer, graph_index, (transport, 'newpack'))

916

932

result = GroupCompressVersionedFiles(index, access, delta)

917

933

result.stream = stream

1018

1034

else:

1019

1035

keys = [key]

1020

1036

parent_map = {key:()}

1037

# So we used Graph(self) to load the parent_map, but now that we have

1038

# it, we can just query the parent map directly, so create a new Graph

1039

# object

1040

graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(parent_map))

1021

1041

head_cache = _mod_graph.FrozenHeadsCache(graph)

1022

1042

parent_cache = {}

1023

1043

reannotate = annotate.reannotate

1024

1044

for record in self.get_record_stream(keys, 'topological', True):

1025

1045

key = record.key

1026

chunks = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1046

lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

1027

1047

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

1028

1048

parent_cache[key] = list(

1029

reannotate(parent_lines, chunks, key, None, head_cache))

1049

reannotate(parent_lines, lines, key, None, head_cache))

1030

1050

return parent_cache[key]

1031

1051

1032

1052

def check(self, progress_bar=None):

1543

1563

"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""

1544

1564

1545

1565

def __init__(self, graph_index, is_locked, parents=True,

1546

add_callback=None):

1566

add_callback=None, track_external_parent_refs=False):

1547

1567

"""Construct a _GCGraphIndex on a graph_index.

1548

1568

1549

1569

:param graph_index: An implementation of bzrlib.index.GraphIndex.

1554

1574

:param add_callback: If not None, allow additions to the index and call

1555

1575

this callback with a list of added GraphIndex nodes:

1556

1576

[(node, value, node_refs), ...]

1577

:param track_external_parent_refs: As keys are added, keep track of the

1578

keys they reference, so that we can query get_missing_parents(),

1579

etc.

1557

1580

"""

1558

1581

self._add_callback = add_callback

1559

1582

self._graph_index = graph_index

1560

1583

self._parents = parents

1561

1584

self.has_graph = parents

1562

1585

self._is_locked = is_locked

1586

if track_external_parent_refs:

1587

self._key_dependencies = knit._KeyRefs()

1588

else:

1589

self._key_dependencies = None

1563

1590

1564

1591

def add_records(self, records, random_id=False):

1565

1592

"""Add multiple records to the index.

1610

1637

for key, (value, node_refs) in keys.iteritems():

1611

1638

result.append((key, value))

1612

1639

records = result

1640

key_dependencies = self._key_dependencies

1641

if key_dependencies is not None and self._parents:

1642

for key, value, refs in records:

1643

parents = refs[0]

1644

key_dependencies.add_references(key, parents)

1613

1645

self._add_callback(records)

1614

1646

1615

1647

def _check_read(self):

1664

1696

result[node[1]] = None

1665

1697

return result

1666

1698

1699

def get_missing_parents(self):

1700

"""Return the keys of missing parents."""

1701

# Copied from _KnitGraphIndex.get_missing_parents

1702

# We may have false positives, so filter those out.

1703

self._key_dependencies.add_keys(

1704

self.get_parent_map(self._key_dependencies.get_unsatisfied_refs()))

1705

return frozenset(self._key_dependencies.get_unsatisfied_refs())

1706

1667

1707

def get_build_details(self, keys):

1668

1708

"""Get the various build details for keys.

1669

1709

1715

1755

delta_end = int(bits[3])

1716

1756

return node[0], start, stop, basis_end, delta_end

1717

1757

1758

def scan_unvalidated_index(self, graph_index):

1759

"""Inform this _GCGraphIndex that there is an unvalidated index.

1760

1761

This allows this _GCGraphIndex to keep track of any missing

1762

compression parents we may want to have filled in to make those

1763

indices valid.

1764

1765

:param graph_index: A GraphIndex

1766

"""

1767

if self._key_dependencies is not None:

1768

# Add parent refs from graph_index (and discard parent refs that

1769

# the graph_index has).

1770

add_refs = self._key_dependencies.add_references

1771

for node in graph_index.iter_all_entries():

1772

add_refs(node[1], node[3][0])

1773

1774

1718

1775

1719

1776

from bzrlib._groupcompress_py import (

1720

1777

apply_delta,

Older »