~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Canonical.com Patch Queue Manager
Date: 2009-06-22 17:11:20 UTC
mfrom: (4398.8.10 1.16-commit-fulltext)
Revision ID: pqm@pqm.ubuntu.com-20090622171120-fuxez9ylfqpxynqn

(jam) Add VF._add_text and reduce memory overhead during commit (see
bug #109114)

files modified:
NEWS

bzrlib/groupcompress.py

bzrlib/knit.py

bzrlib/repository.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_versionedfile.py

bzrlib/tuned_gzip.py

bzrlib/versionedfile.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

from cStringIO import StringIO

from itertools import izip, chain

from itertools import izip

import operator

import os

import sys

686

content = knit._get_content(key)

687

# adjust for the fact that serialised annotations are only key suffixes

688

# for this factory.

689

if type(key) == tuple:

689

if type(key) is tuple:

690

prefix = key[:-1]

691

origins = content.annotate()

692

result = []

909

# indexes can't directly store that, so we give them

910

# an empty tuple instead.

911

parents = ()

912

line_bytes = ''.join(lines)

912

913

return self._add(key, lines, parents,

913

parent_texts, left_matching_blocks, nostore_sha, random_id)

914

parent_texts, left_matching_blocks, nostore_sha, random_id,

915

line_bytes=line_bytes)

916

917

def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):

918

"""See VersionedFiles.add_text()."""

919

self._index._check_write_ok()

920

self._check_add(key, None, random_id, check_content=False)

921

if text.__class__ is not str:

922

raise errors.BzrBadParameterUnicode("text")

923

if parents is None:

924

# The caller might pass None if there is no graph data, but kndx

925

# indexes can't directly store that, so we give them

926

# an empty tuple instead.

927

parents = ()

928

return self._add(key, None, parents,

929

None, None, nostore_sha, random_id,

930

line_bytes=text)

914

931

915

932

def _add(self, key, lines, parents, parent_texts,

916

left_matching_blocks, nostore_sha, random_id):

933

left_matching_blocks, nostore_sha, random_id,

934

line_bytes):

917

935

"""Add a set of lines on top of version specified by parents.

918

936

919

937

Any versions not present will be converted into ghosts.

938

939

:param lines: A list of strings where each one is a single line (has a

940

single newline at the end of the string) This is now optional

941

(callers can pass None). It is left in its location for backwards

942

compatibility. It should ''.join(lines) must == line_bytes

943

:param line_bytes: A single string containing the content

944

945

We pass both lines and line_bytes because different routes bring the

946

values to this function. And for memory efficiency, we don't want to

947

have to split/join on-demand.

920

948

"""

921

949

# first thing, if the content is something we don't need to store, find

922

950

# that out.

923

line_bytes = ''.join(lines)

924

951

digest = sha_string(line_bytes)

925

952

if nostore_sha == digest:

926

953

raise errors.ExistingContent

947

974

948

975

text_length = len(line_bytes)

949

976

options = []

950

if lines:

951

if lines[-1][-1] != '\n':

952

# copy the contents of lines.

977

no_eol = False

978

# Note: line_bytes is not modified to add a newline, that is tracked

979

# via the no_eol flag. 'lines' *is* modified, because that is the

980

# general values needed by the Content code.

981

if line_bytes and line_bytes[-1] != '\n':

982

options.append('no-eol')

983

no_eol = True

984

# Copy the existing list, or create a new one

985

if lines is None:

986

lines = osutils.split_lines(line_bytes)

987

else:

953

988

lines = lines[:]

954

options.append('no-eol')

955

lines[-1] = lines[-1] + '\n'

956

line_bytes += '\n'

989

# Replace the last line with one that ends in a final newline

990

lines[-1] = lines[-1] + '\n'

991

if lines is None:

992

lines = osutils.split_lines(line_bytes)

957

993

958

994

for element in key[:-1]:

959

if type(element) != str:

995

if type(element) is not str:

960

996

raise TypeError("key contains non-strings: %r" % (key,))

961

997

if key[-1] is None:

962

998

key = key[:-1] + ('sha1:' + digest,)

963

elif type(key[-1]) != str:

999

elif type(key[-1]) is not str:

964

1000

raise TypeError("key contains non-strings: %r" % (key,))

965

1001

# Knit hunks are still last-element only

966

1002

version_id = key[-1]

967

1003

content = self._factory.make(lines, version_id)

968

if 'no-eol' in options:

1004

if no_eol:

969

1005

# Hint to the content object that its text() call should strip the

970

1006

# EOL.

971

1007

content._should_strip_eol = True

986

1022

if self._factory.__class__ is KnitPlainFactory:

987

1023

# Use the already joined bytes saving iteration time in

988

1024

# _record_to_data.

1025

dense_lines = [line_bytes]

1026

if no_eol:

1027

dense_lines.append('\n')

989

1028

size, bytes = self._record_to_data(key, digest,

990

lines, [line_bytes])

1029

lines, dense_lines)

991

1030

else:

992

1031

# get mixed annotation + content and feed it into the

993

1032

# serialiser.

1920

1959

function spends less time resizing the final string.

1921

1960

:return: (len, a StringIO instance with the raw data ready to read.)

1922

1961

"""

1923

# Note: using a string copy here increases memory pressure with e.g.

1924

# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine

1925

# when doing the initial commit of a mozilla tree. RBC 20070921

1926

bytes = ''.join(chain(

1927

["version %s %d %s\n" % (key[-1],

1928

len(lines),

1929

digest)],

1930

dense_lines or lines,

1931

["end %s\n" % key[-1]]))

1932

if type(bytes) != str:

1933

raise AssertionError(

1934

'data must be plain bytes was %s' % type(bytes))

1962

chunks = ["version %s %d %s\n" % (key[-1], len(lines), digest)]

1963

chunks.extend(dense_lines or lines)

1964

chunks.append("end %s\n" % key[-1])

1965

for chunk in chunks:

1966

if type(chunk) is not str:

1967

raise AssertionError(

1968

'data must be plain bytes was %s' % type(chunk))

1935

1969

if lines and lines[-1][-1] != '\n':

1936

1970

raise ValueError('corrupt lines value %r' % lines)

1937

compressed_bytes = tuned_gzip.bytes_to_gzip(bytes)

1971

compressed_bytes = tuned_gzip.chunks_to_gzip(chunks)

1938

1972

return len(compressed_bytes), compressed_bytes

1939

1973

1940

1974

def _split_header(self, line):

2375

2409

line = "\n%s %s %s %s %s :" % (

2376

2410

key[-1], ','.join(options), pos, size,

2377

2411

self._dictionary_compress(parents))

2378

if type(line) != str:

2412

if type(line) is not str:

2379

2413

raise AssertionError(

2380

2414

'data must be utf8 was %s' % type(line))

2381

2415

lines.append(line)

2570

2604

result = set()

2571

2605

# Identify all key prefixes.

2572

2606

# XXX: A bit hacky, needs polish.

2573

if type(self._mapper) == ConstantMapper:

2607

if type(self._mapper) is ConstantMapper:

2574

2608

prefixes = [()]

2575

2609

else:

2576

2610

relpaths = set()

2608

2642

del self._history

2609

2643

except NoSuchFile:

2610

2644

self._kndx_cache[prefix] = ({}, [])

2611

if type(self._mapper) == ConstantMapper:

2645

if type(self._mapper) is ConstantMapper:

2612

2646

# preserve behaviour for revisions.kndx etc.

2613

2647

self._init_index(path)

2614

2648

del self._cache

3094

3128

opaque index memo. For _KnitKeyAccess the memo is (key, pos,

3095

3129

length), where the key is the record key.

3096

3130

"""

3097

if type(raw_data) != str:

3131

if type(raw_data) is not str:

3098

3132

raise AssertionError(

3099

3133

'data must be plain bytes was %s' % type(raw_data))

3100

3134

result = []

3183

3217

length), where the index field is the write_index object supplied

3184

3218

to the PackAccess object.

3185

3219

"""

3186

if type(raw_data) != str:

3220

if type(raw_data) is not str:

3187

3221

raise AssertionError(

3188

3222

'data must be plain bytes was %s' % type(raw_data))

3189

3223

result = []

Older »