~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Alexander Belchenko
Date: 2007-10-04 05:50:44 UTC
mfrom: (2881 +trunk)
mto: This revision was merged to the branch mainline in revision 2884.
Revision ID: bialix@ukr.net-20071004055044-pb88kgkfayawro8n

merge bzr.dev

files added:
bzrlib/tests/tree_implementations/test_path_content_summary.py

files modified:
.bzrignore

NEWS

bzrlib/__init__.py

bzrlib/builtins.py

bzrlib/bundle/serializer/v4.py

bzrlib/bzrdir.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/graph.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/memorytree.py

bzrlib/multiparent.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_rename_one.py

bzrlib/trace.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tuned_gzip.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/HACKING.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

100

RevisionNotPresent,

101

RevisionAlreadyPresent,

102

)

103

from bzrlib.tuned_gzip import GzipFile

103

from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip

104

from bzrlib.osutils import (

105

contains_whitespace,

106

contains_linebreaks,

107

sha_string,

107

108

sha_strings,

108

109

)

109

110

from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed

253

254

def parse_line_delta_iter(self, lines):

254

255

return iter(self.parse_line_delta(lines))

255

256

def parse_line_delta(self, lines, version_id):

257

def parse_line_delta(self, lines, version_id, plain=False):

257

258

"""Convert a line based delta into internal representation.

258

259

260

line delta is in the form of:

262

263

revid(utf8) newline\n

263

264

internal representation is

264

265

(start, end, count, [1..count tuples (revid, newline)])

266

267

:param plain: If True, the lines are returned as a plain

268

list, not as a list of tuples, i.e.

269

(start, end, count, [1..count newline])

265

270

"""

266

271

result = []

267

272

lines = iter(lines)

273

278

return cache.setdefault(origin, origin), text

274

279

275

280

# walk through the lines parsing.

276

for header in lines:

277

start, end, count = [int(n) for n in header.split(',')]

278

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

279

result.append((start, end, count, contents))

281

# Note that the plain test is explicitly pulled out of the

282

# loop to minimise any performance impact

283

if plain:

284

for header in lines:

285

start, end, count = [int(n) for n in header.split(',')]

286

contents = [next().split(' ', 1)[1] for i in xrange(count)]

287

result.append((start, end, count, contents))

288

else:

289

for header in lines:

290

start, end, count = [int(n) for n in header.split(',')]

291

contents = [tuple(next().split(' ', 1)) for i in xrange(count)]

292

result.append((start, end, count, contents))

280

293

return result

281

294

282

295

def get_fulltext_content(self, lines):

820

833

"""See VersionedFile.add_lines_with_ghosts()."""

821

834

self._check_add(version_id, lines, random_id, check_content)

822

835

return self._add(version_id, lines, parents, self.delta,

823

parent_texts, None, nostore_sha)

836

parent_texts, None, nostore_sha, random_id)

824

837

825

838

def _add_lines(self, version_id, parents, lines, parent_texts,

826

839

left_matching_blocks, nostore_sha, random_id, check_content):

828

841

self._check_add(version_id, lines, random_id, check_content)

829

842

self._check_versions_present(parents)

830

843

return self._add(version_id, lines[:], parents, self.delta,

831

parent_texts, left_matching_blocks, nostore_sha)

844

parent_texts, left_matching_blocks, nostore_sha, random_id)

832

845

833

846

def _check_add(self, version_id, lines, random_id, check_content):

834

847

"""check that version_id and lines are safe to add."""

846

859

self._check_lines_are_lines(lines)

847

860

848

861

def _add(self, version_id, lines, parents, delta, parent_texts,

849

left_matching_blocks, nostore_sha):

862

left_matching_blocks, nostore_sha, random_id):

850

863

"""Add a set of lines on top of version specified by parents.

851

864

852

865

If delta is true, compress the text as a line-delta against

854

867

855

868

Any versions not present will be converted into ghosts.

856

869

"""

857

# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)

858

# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)

859

# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)

860

# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)

861

# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)

862

# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)

863

# +1383 0 8.0370 8.0370 +<len>

864

# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)

865

# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)

866

# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)

867

# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)

870

# first thing, if the content is something we don't need to store, find

871

# that out.

872

line_bytes = ''.join(lines)

873

digest = sha_string(line_bytes)

874

if nostore_sha == digest:

875

raise errors.ExistingContent

868

876

869

877

present_parents = []

870

878

if parent_texts is None:

879

887

present_parents[0] != parents[0])):

880

888

delta = False

881

889

882

digest = sha_strings(lines)

883

if nostore_sha == digest:

884

raise errors.ExistingContent

885

text_length = sum(map(len, lines))

890

text_length = len(line_bytes)

886

891

options = []

887

892

if lines:

888

893

if lines[-1][-1] != '\n':

908

913

if delta:

909

914

options.append('line-delta')

910

915

store_lines = self.factory.lower_line_delta(delta_hunks)

916

size, bytes = self._data._record_to_data(version_id, digest,

917

store_lines)

911

918

else:

912

919

options.append('fulltext')

920

# get mixed annotation + content and feed it into the

921

# serialiser.

913

922

store_lines = self.factory.lower_fulltext(content)

923

size, bytes = self._data._record_to_data(version_id, digest,

924

store_lines)

914

925

915

access_memo = self._data.add_record(version_id, digest, store_lines)

916

self._index.add_version(version_id, options, access_memo, parents)

926

access_memo = self._data.add_raw_records([size], bytes)[0]

927

self._index.add_versions(

928

((version_id, options, access_memo, parents),),

929

random_id=random_id)

917

930

return digest, text_length, content

918

931

919

932

def check(self, progress_bar=None):

1022

1035

text_map[version_id] = text

1023

1036

return text_map, final_content

1024

1037

1038

@staticmethod

1039

def _apply_delta(lines, delta):

1040

"""Apply delta to lines."""

1041

lines = list(lines)

1042

offset = 0

1043

for start, end, count, delta_lines in delta:

1044

lines[offset+start:offset+end] = delta_lines

1045

offset = offset + (start - end) + count

1046

return lines

1047

1025

1048

def iter_lines_added_or_present_in_versions(self, version_ids=None,

1026

1049

pb=None):

1027

1050

"""See VersionedFile.iter_lines_added_or_present_in_versions()."""

1359

1382

"""Add a version record to the index."""

1360

1383

self.add_versions(((version_id, options, index_memo, parents),))

1361

1384

1362

def add_versions(self, versions):

1385

def add_versions(self, versions, random_id=False):

1363

1386

"""Add multiple versions to the index.

1364

1387

1365

1388

:param versions: a list of tuples:

1366

1389

(version_id, options, pos, size, parents).

1390

:param random_id: If True the ids being added were randomly generated

1391

and no check for existence will be performed.

1367

1392

"""

1368

1393

lines = []

1369

1394

orig_history = self._history[:]

1699

1724

"""Add a version record to the index."""

1700

1725

return self.add_versions(((version_id, options, access_memo, parents),))

1701

1726

1702

def add_versions(self, versions):

1727

def add_versions(self, versions, random_id=False):

1703

1728

"""Add multiple versions to the index.

1704

1729

1705

1730

This function does not insert data into the Immutable GraphIndex

1709

1734

1710

1735

:param versions: a list of tuples:

1711

1736

(version_id, options, pos, size, parents).

1737

:param random_id: If True the ids being added were randomly generated

1738

and no check for existence will be performed.

1712

1739

"""

1713

1740

if not self._add_callback:

1714

1741

raise errors.ReadOnlyError(self)

1743

1770

"in parentless index.")

1744

1771

node_refs = ()

1745

1772

keys[key] = (value, node_refs)

1746

present_nodes = self._get_entries(keys)

1747

for (index, key, value, node_refs) in present_nodes:

1748

if (value, node_refs) != keys[key]:

1749

raise KnitCorrupt(self, "inconsistent details in add_versions"

1750

": %s %s" % ((value, node_refs), keys[key]))

1751

del keys[key]

1773

if not random_id:

1774

present_nodes = self._get_entries(keys)

1775

for (index, key, value, node_refs) in present_nodes:

1776

if (value, node_refs) != keys[key]:

1777

raise KnitCorrupt(self, "inconsistent details in add_versions"

1778

": %s %s" % ((value, node_refs), keys[key]))

1779

del keys[key]

1752

1780

result = []

1753

1781

if self._parents:

1754

1782

for key, (value, node_refs) in keys.iteritems():

1968

1996

1969

1997

:return: (len, a StringIO instance with the raw data ready to read.)

1970

1998

"""

1971

sio = StringIO()

1972

data_file = GzipFile(None, mode='wb', fileobj=sio,

1973

compresslevel=Z_DEFAULT_COMPRESSION)

1974

1975

assert isinstance(version_id, str)

1976

data_file.writelines(chain(

1999

bytes = (''.join(chain(

1977

2000

["version %s %d %s\n" % (version_id,

1978

2001

len(lines),

1979

2002

digest)],

1980

2003

lines,

1981

["end %s\n" % version_id]))

1982

data_file.close()

1983

length= sio.tell()

1984

1985

sio.seek(0)

1986

return length, sio

2004

["end %s\n" % version_id])))

2005

assert bytes.__class__ == str

2006

compressed_bytes = bytes_to_gzip(bytes)

2007

return len(compressed_bytes), compressed_bytes

1987

2008

1988

2009

def add_raw_records(self, sizes, raw_data):

1989

2010

"""Append a prepared record to the data file.

1996

2017

"""

1997

2018

return self._access.add_raw_records(sizes, raw_data)

1998

2019

1999

def add_record(self, version_id, digest, lines):

2000

"""Write new text record to disk.

2001

2002

Returns index data for retrieving it later, as per add_raw_records.

2003

"""

2004

size, sio = self._record_to_data(version_id, digest, lines)

2005

result = self.add_raw_records([size], sio.getvalue())

2006

if self._do_cache:

2007

self._cache[version_id] = sio.getvalue()

2008

return result[0]

2009

2010

2020

def _parse_record_header(self, version_id, raw_data):

2011

2021

"""Parse a record header for consistency.

2012

2022

2173

2183

assert isinstance(self.source, KnitVersionedFile)

2174

2184

assert isinstance(self.target, KnitVersionedFile)

2175

2185

2186

# If the source and target are mismatched w.r.t. annotations vs

2187

# plain, the data needs to be converted accordingly

2188

if self.source.factory.annotated == self.target.factory.annotated:

2189

converter = None

2190

elif self.source.factory.annotated:

2191

converter = self._anno_to_plain_converter

2192

else:

2193

# We're converting from a plain to an annotated knit. This requires

2194

# building the annotations from scratch. The generic join code

2195

# handles this implicitly so we delegate to it.

2196

return super(InterKnit, self).join(pb, msg, version_ids,

2197

ignore_missing)

2198

2176

2199

version_ids = self._get_source_version_ids(version_ids, ignore_missing)

2177

2178

2200

if not version_ids:

2179

2201

return 0

2180

2202

2232

2254

assert version_id == version_id2, 'logic error, inconsistent results'

2233

2255

count = count + 1

2234

2256

pb.update("Joining knit", count, total)

2235

raw_records.append((version_id, options, parents, len(raw_data)))

2257

if converter:

2258

size, raw_data = converter(raw_data, version_id, options,

2259

parents)

2260

else:

2261

size = len(raw_data)

2262

raw_records.append((version_id, options, parents, size))

2236

2263

raw_datum.append(raw_data)

2237

2264

self.target._add_raw_records(raw_records, ''.join(raw_datum))

2238

2265

return count

2239

2266

finally:

2240

2267

pb.finished()

2241

2268

2269

def _anno_to_plain_converter(self, raw_data, version_id, options,

2270

parents):

2271

"""Convert annotated content to plain content."""

2272

data, digest = self.source._data._parse_record(version_id, raw_data)

2273

if 'fulltext' in options:

2274

content = self.source.factory.parse_fulltext(data, version_id)

2275

lines = self.target.factory.lower_fulltext(content)

2276

else:

2277

delta = self.source.factory.parse_line_delta(data, version_id,

2278

plain=True)

2279

lines = self.target.factory.lower_line_delta(delta)

2280

return self.target._data._record_to_data(version_id, digest, lines)

2281

2242

2282

2243

2283

InterVersionedFile.register_optimiser(InterKnit)

2244

2284

Older »