~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/pack.py

Committer: Martin Pool
Date: 2011-11-29 09:13:54 UTC
mto: This revision was merged to the branch mainline in revision 6329.
Revision ID: mbp@canonical.com-20111129091354-zcwnzn3cy1jfzqju

ContainerWriter: Avoid one possible large-string join

files modified:
bzrlib/pack.py

bzrlib/tests/test_pack.py

Show diffs side-by-side

added added

removed removed

bzrlib/pack.py

"""Return the bytes to finish a container."""

return "E"

def bytes_record(self, bytes, names):

"""Return the bytes for a Bytes record with the given name and

contents.

"""

def bytes_header(self, length, names):

"""Return the header for a Bytes record."""

# Kind marker

byte_sections = ["B"]

# Length

byte_sections.append(str(len(bytes)) + "\n")

byte_sections.append(str(length) + "\n")

# Names

for name_tuple in names:

# Make sure we're writing valid names. Note that we will leave a

byte_sections.append('\x00'.join(name_tuple) + "\n")

# End of headers

byte_sections.append("\n")

# Finally, the contents.

byte_sections.append(bytes)

# XXX: This causes a memory copy of bytes in size, but is usually

# faster than two write calls (12 vs 13 seconds to output a gig of

# 1k records.) - results may differ on significantly larger records

100

# like .iso's but as they should be rare in any case and thus not

101

# likely to be the common case. The biggest issue is causing extreme

102

# memory pressure in that case. One possibly improvement here is to

103

# check the size of the content before deciding to join here vs call

104

# write twice.

105

return ''.join(byte_sections)

106

def bytes_record(self, bytes, names):

"""Return the bytes for a Bytes record with the given name and

contents.

If the content may be large, construct the header separately and then

100

stream out the contents.

101

"""

102

return self.bytes_header(len(bytes), names) + bytes

103

107

104

108

105

class ContainerWriter(object):

109

106

"""A class for writing containers to a file.

113

110

introduced by the begin() and end() methods.

114

111

"""

115

112

113

# Join up headers with the body if writing fewer than this many bytes:

114

# trades off memory usage and copying to do less IO ops.

115

_JOIN_WRITES_THRESHOLD = 100000

116

117

def __init__(self, write_func):

117

118

"""Constructor.

118

119

151

152

and thus are only suitable for use by a ContainerReader.

152

153

"""

153

154

current_offset = self.current_offset

154

serialised_record = self._serialiser.bytes_record(bytes, names)

155

self.write_func(serialised_record)

155

length = len(bytes)

156

if length < self._JOIN_WRITES_THRESHOLD:

157

self.write_func(self._serialiser.bytes_header(length, names)

158

+ bytes)

159

else:

160

self.write_func(self._serialiser.bytes_header(length, names))

161

self.write_func(bytes)

156

162

self.records_written += 1

157

163

# return a memo of where we wrote data to allow random access.

158

164

return current_offset, self.current_offset - current_offset

Older »