~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2008-05-11 23:49:50 UTC
mfrom: (3423 +trunk)
mto: (3350.4.4 remove-deprecations)
mto: This revision was merged to the branch mainline in revision 3424.
Revision ID: robertc@robertcollins.net-20080511234950-pfiipsq0r45lbxqd

Merge bzr.dev.

files added:
bzrlib/tests/branch_implementations/test_check.py

bzrlib/tests/branch_implementations/test_reconcile.py

bzrlib/tests/interrepository_implementations/test_fetch.py

doc/developers/releasing.txt

doc/en/user-guide/bzrtools_plugin.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/web_browsing.txt

files renamed:
doc/en/user-guide/best_practice_intro.txt => doc/en/user-guide/part2_intro.txt

files modified:
NEWS

bzrlib/__init__.py

bzrlib/_dirstate_helpers_py.py

bzrlib/_patiencediff_py.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/counted_lock.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc/__init__.py

bzrlib/doc/api/__init__.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/authentication.txt

bzrlib/help_topics/en/hooks.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lazy_regex.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/smart/branch.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_hooks.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_push.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/commands/__init__.py

bzrlib/tests/ftp_server.py

bzrlib/tests/http_server.py

bzrlib/tests/http_utils.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_strace.py

bzrlib/tests/test_subsume.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/transport_util.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_add_reference.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_walkdirs.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/timestamp.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/tuned_gzip.py

bzrlib/urlutils.py

bzrlib/version_info_formats/__init__.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml8.py

doc/default.css

doc/developers/HACKING.txt

doc/developers/authentication-ring.txt

doc/developers/index.txt

doc/developers/network-protocol.txt

doc/en/user-guide/bug_trackers.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/hooks.txt

doc/en/user-guide/index.txt

doc/en/user-guide/introducing_bazaar.txt

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

448

"""

449

method, noeol = record_details

450

if method == 'line-delta':

451

assert base_content is not None

452

451

if copy_base_content:

453

452

content = base_content.copy()

454

453

else:

836

835

# put them in anywhere, but we hope that sending them soon

837

836

# after the fulltext will give good locality in the receiver

838

837

ready_to_send[:0] = deferred.pop(version_id)

839

assert len(deferred) == 0, \

840

"Still have compressed child versions waiting to be sent"

838

if not (len(deferred) == 0):

839

raise AssertionError("Still have compressed child versions waiting to be sent")

841

840

# XXX: The stream format is such that we cannot stream it - we have to

842

841

# know the length of all the data a-priori.

843

842

raw_datum = []

846

845

(version_id2, options, _, parents) in \

847

846

izip(self._data.read_records_iter_raw(copy_queue_records),

848

847

temp_version_list):

849

assert version_id == version_id2, \

850

'logic error, inconsistent results'

848

if not (version_id == version_id2):

849

raise AssertionError('logic error, inconsistent results')

851

850

raw_datum.append(raw_data)

852

851

result_version_list.append(

853

852

(version_id, options, len(raw_data), parents))

1038

1037

# We received a line-delta record for a non-delta knit.

1039

1038

# Convert it to a fulltext.

1040

1039

gzip_bytes = reader_callable(length)

1041

lines, sha1 = self._data._parse_record(

1042

version_id, gzip_bytes)

1043

delta = self.factory.parse_line_delta(lines,

1044

version_id)

1045

content = self.factory.make(

1046

self.get_lines(parents[0]), parents[0])

1047

content.apply_delta(delta, version_id)

1048

digest, len, content = self.add_lines(

1049

version_id, parents, content.text())

1050

if digest != sha1:

1051

raise errors.VersionedFileInvalidChecksum(version)

1040

self._convert_line_delta_to_fulltext(

1041

gzip_bytes, version_id, parents)

1052

1042

continue

1053

1043

1054

1044

self._add_raw_records(

1055

1045

[(version_id, options, parents, length)],

1056

1046

reader_callable(length))

1057

1047

1048

def _convert_line_delta_to_fulltext(self, gzip_bytes, version_id, parents):

1049

lines, sha1 = self._data._parse_record(version_id, gzip_bytes)

1050

delta = self.factory.parse_line_delta(lines, version_id)

1051

content = self.factory.make(self.get_lines(parents[0]), parents[0])

1052

content.apply_delta(delta, version_id)

1053

digest, len, content = self.add_lines(

1054

version_id, parents, content.text())

1055

if digest != sha1:

1056

raise errors.VersionedFileInvalidChecksum(version_id)

1057

1058

def _knit_from_datastream(self, (format, data_list, reader_callable)):

1059

"""Create a knit object from a data stream.

1060

1362

# I/O and the time spend applying deltas.

1363

delta = self._check_should_delta(present_parents)

1364

1365

assert isinstance(version_id, str)

1366

1365

content = self.factory.make(lines, version_id)

1367

1366

if delta or (self.factory.annotated and len(present_parents) > 0):

1368

1367

# Merge annotations from parent texts if needed.

1541

1540

enumerate(self._data.read_records_iter(version_id_records)):

1542

1541

pb.update('Walking content.', version_idx, total)

1543

1542

method = self._index.get_method(version_id)

1544

1545

assert method in ('fulltext', 'line-delta')

1546

1543

if method == 'fulltext':

1547

1544

line_iterator = self.factory.get_fulltext_content(data)

1548

else:

1545

elif method == 'line-delta':

1549

1546

line_iterator = self.factory.get_linedelta_content(data)

1547

else:

1548

raise ValueError('invalid method %r' % (method,))

1550

1549

# XXX: It might be more efficient to yield (version_id,

1551

1550

# line_iterator) in the future. However for now, this is a simpler

1552

1551

# change to integrate into the rest of the codebase. RBC 20071110

1863

1862

pos,

1864

1863

size,

1865

1864

self._version_list_to_index(parents))

1866

assert isinstance(line, str), \

1867

'content must be utf-8 encoded: %r' % (line,)

1868

1865

lines.append(line)

1869

1866

self._cache_version(version_id, options, pos, size, tuple(parents))

1870

1867

if not self._need_to_create:

2130

2127

compression_parents = an_entry[3][1]

2131

2128

if not compression_parents:

2132

2129

return None

2133

assert len(compression_parents) == 1

2134

2130

return compression_parents[0]

2135

2131

2136

2132

def _get_method(self, node):

2317

2313

tuple - (index, pos, length), where the index field is always None

2318

2314

for the .knit access method.

2319

2315

"""

2320

assert type(raw_data) == str, \

2321

'data must be plain bytes was %s' % type(raw_data)

2322

2316

if not self._need_to_create:

2323

2317

base = self._transport.append_bytes(self._filename, raw_data)

2324

2318

else:

2401

2395

tuple - (index, pos, length), where the index field is the

2402

2396

write_index object supplied to the PackAccess object.

2403

2397

"""

2404

assert type(raw_data) == str, \

2405

'data must be plain bytes was %s' % type(raw_data)

2406

2398

result = []

2407

2399

offset = 0

2408

2400

for size in sizes:

2501

2493

# use a generator for memory friendliness

2502

2494

for from_backing_knit, version_id, start, end in memos_for_retrieval:

2503

2495

if not from_backing_knit:

2504

assert version_id is self.stream_index

2496

if version_id is not self.stream_index:

2497

raise AssertionError()

2505

2498

yield self.data[start:end]

2506

2499

continue

2507

2500

# we have been asked to thunk. This thunking only occurs when

2728

2721

digest)],

2729

2722

dense_lines or lines,

2730

2723

["end %s\n" % version_id]))

2731

assert bytes.__class__ == str

2732

2724

compressed_bytes = bytes_to_gzip(bytes)

2733

2725

return len(compressed_bytes), compressed_bytes

2734

2726

2928

2920

2929

2921

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

2930

2922

"""See InterVersionedFile.join."""

2931

assert isinstance(self.source, KnitVersionedFile)

2932

assert isinstance(self.target, KnitVersionedFile)

2933

2934

2923

# If the source and target are mismatched w.r.t. annotations vs

2935

2924

# plain, the data needs to be converted accordingly

2936

2925

if self.source.factory.annotated == self.target.factory.annotated:

2982

2971

# * already have it or

2983

2972

# * have it scheduled already

2984

2973

# otherwise we don't care

2985

assert (self.target.has_version(parent) or

2974

if not (self.target.has_version(parent) or

2986

2975

parent in copy_set or

2987

not self.source.has_version(parent))

2976

not self.source.has_version(parent)):

2977

raise AssertionError("problem joining parent %r "

2978

"from %r to %r"

2979

% (parent, self.source, self.target))

2988

2980

index_memo = self.source._index.get_position(version_id)

2989

2981

copy_queue_records.append((version_id, index_memo))

2990

2982

copy_queue.append((version_id, options, parents))

2999

2991

(version_id2, options, parents) in \

3000

2992

izip(self.source._data.read_records_iter_raw(copy_queue_records),

3001

2993

copy_queue):

3002

assert version_id == version_id2, 'logic error, inconsistent results'

2994

if not (version_id == version_id2):

2995

raise AssertionError('logic error, inconsistent results')

3003

2996

count = count + 1

3004

2997

pb.update("Joining knit", count, total)

3005

2998

if converter:

3048

3041

3049

3042

def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):

3050

3043

"""See InterVersionedFile.join."""

3051

assert isinstance(self.source, bzrlib.weave.Weave)

3052

assert isinstance(self.target, KnitVersionedFile)

3053

3054

3044

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

3055

3045

3056

3046

if not version_ids:

3082

3072

# check that its will be a consistent copy:

3083

3073

for parent in parents:

3084

3074

# if source has the parent, we must already have it

3085

assert (self.target.has_version(parent))

3075

if not self.target.has_version(parent):

3076

raise AssertionError("%r does not have parent %r"

3077

% (self.target, parent))

3086

3078

self.target.add_lines(

3087

3079

version_id, parents, self.source.get_lines(version_id))

3088

3080

count = count + 1

3258

3250

# add a key, no parents

3259

3251

self._revision_id_graph[missing_version] = ()

3260

3252

pending.discard(missing_version) # don't look for it

3261

# XXX: This should probably be a real exception, as it is a data

3262

# inconsistency

3263

assert not self._ghosts.intersection(self._compression_children), \

3264

"We cannot have nodes which have a compression parent of a ghost."

3253

if self._ghosts.intersection(self._compression_children):

3254

raise KnitCorrupt(

3255

"We cannot have nodes which have a ghost compression parent:\n"

3256

"ghosts: %r\n"

3257

"compression children: %r"

3258

% (self._ghosts, self._compression_children))

3265

3259

# Cleanout anything that depends on a ghost so that we don't wait for

3266

3260

# the ghost to show up

3267

3261

for node in self._ghosts:

3295

3289

if len(parent_ids) == 0:

3296

3290

# There are no parents for this node, so just add it

3297

3291

# TODO: This probably needs to be decoupled

3298

assert compression_parent is None

3299

3292

fulltext_content, delta = self._knit.factory.parse_record(

3300

3293

rev_id, record, record_details, None)

3301

3294

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3312

3305

record_details) = self._all_build_details[rev_id]

3313

3306

if compression_parent is not None:

3314

3307

comp_children = self._compression_children[compression_parent]

3315

assert rev_id in comp_children

3308

if rev_id not in comp_children:

3309

raise AssertionError("%r not in compression children %r"

3310

% (rev_id, comp_children))

3316

3311

# If there is only 1 child, it is safe to reuse this

3317

3312

# content

3318

3313

reuse_content = (len(comp_children) == 1

Older »