~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_groupcompress_py.py

Committer: Robert J. Tanner
Date: 2009-04-30 22:40:42 UTC
mfrom: (4323 +trunk)
mto: This revision was merged to the branch mainline in revision 4324.
Revision ID: tanner@real-time.com-20090430224042-53v45axtue5bw45l

Merge 1.14.1 back to trunk

files added:
bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_reference.py

files removed:
bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

files modified:
Makefile

NEWS

bzrlib/__init__.py

bzrlib/_groupcompress_py.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/config.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/foreign.py

bzrlib/groupcompress.py

bzrlib/hashcache.py

bzrlib/help_topics/en/rules.txt

bzrlib/hooks.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/log.py

bzrlib/lru_cache.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/account.py

bzrlib/plugins/netrc_credential_store/__init__.py

bzrlib/push.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/message.py

bzrlib/smart/request.py

bzrlib/smtp_connection.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_create_clone.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_config.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_http.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_get_symlink_target.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_path_content_summary.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/tree_implementations/test_walkdirs.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/ssh.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/util/configobj/configobj.py

bzrlib/versionedfile.py

doc/developers/HACKING.txt

doc/en/user-guide/installing_bazaar.txt

Show diffs side-by-side

added added

removed removed

bzrlib/_groupcompress_py.py

from bzrlib import osutils

class _OutputHandler(object):

"""A simple class which just tracks how to split up an insert request."""

def __init__(self, out_lines, index_lines, min_len_to_index):

self.out_lines = out_lines

self.index_lines = index_lines

self.min_len_to_index = min_len_to_index

self.cur_insert_lines = []

self.cur_insert_len = 0

def add_copy(self, start_byte, end_byte):

# The data stream allows >64kB in a copy, but to match the compiled

# code, we will also limit it to a 64kB copy

for start_byte in xrange(start_byte, end_byte, 64*1024):

num_bytes = min(64*1024, end_byte - start_byte)

copy_bytes = encode_copy_instruction(start_byte, num_bytes)

self.out_lines.append(copy_bytes)

self.index_lines.append(False)

def _flush_insert(self):

if not self.cur_insert_lines:

return

if self.cur_insert_len > 127:

raise AssertionError('We cannot insert more than 127 bytes'

' at a time.')

self.out_lines.append(chr(self.cur_insert_len))

self.index_lines.append(False)

self.out_lines.extend(self.cur_insert_lines)

if self.cur_insert_len < self.min_len_to_index:

self.index_lines.extend([False]*len(self.cur_insert_lines))

else:

self.index_lines.extend([True]*len(self.cur_insert_lines))

self.cur_insert_lines = []

self.cur_insert_len = 0

def _insert_long_line(self, line):

# Flush out anything pending

self._flush_insert()

line_len = len(line)

for start_index in xrange(0, line_len, 127):

next_len = min(127, line_len - start_index)

self.out_lines.append(chr(next_len))

self.index_lines.append(False)

self.out_lines.append(line[start_index:start_index+next_len])

# We don't index long lines, because we won't be able to match

# a line split across multiple inserts anway

self.index_lines.append(False)

def add_insert(self, lines):

if self.cur_insert_lines != []:

raise AssertionError('self.cur_insert_lines must be empty when'

' adding a new insert')

for line in lines:

if len(line) > 127:

self._insert_long_line(line)

else:

next_len = len(line) + self.cur_insert_len

if next_len > 127:

# Adding this line would overflow, so flush, and start over

self._flush_insert()

self.cur_insert_lines = [line]

self.cur_insert_len = len(line)

else:

self.cur_insert_lines.append(line)

self.cur_insert_len = next_len

self._flush_insert()

class LinesDeltaIndex(object):

"""This class indexes matches between strings.

101

:ivar endpoint: The total number of bytes in self.line_offsets

102

"""

103

104

_MIN_MATCH_BYTES = 10

105

_SOFT_MIN_MATCH_BYTES = 200

106

107

def __init__(self, lines):

108

self.lines = []

109

self.line_offsets = []

121

for idx, do_index in enumerate(index):

122

if not do_index:

123

continue

matches.setdefault(new_lines[idx], []).append(start_idx + idx)

124

line = new_lines[idx]

125

try:

126

matches[line].add(start_idx + idx)

127

except KeyError:

128

matches[line] = set([start_idx + idx])

129

130

def get_matches(self, line):

131

"""Return the lines which match the line in right."""

134

except KeyError:

135

return None

136

def _get_longest_match(self, lines, pos, locations):

137

def _get_longest_match(self, lines, pos):

138

"""Look at all matches for the current line, return the longest.

139

140

:param lines: The lines we are matching against

149

"""

150

range_start = pos

151

range_len = 0

copy_ends = None

152

prev_locations = None

153

max_pos = len(lines)

154

matching = self._matching_lines

155

while pos < max_pos:

if locations is None:

# TODO: is try/except better than get(..., None)?

try:

locations = self._matching_lines[lines[pos]]

except KeyError:

locations = None

if locations is None:

156

try:

157

locations = matching[lines[pos]]

158

except KeyError:

159

# No more matches, just return whatever we have, but we know

160

# that this last position is not going to match anything

161

pos += 1

162

break

163

# We have a match

164

if prev_locations is None:

165

# This is the first match in a range

166

prev_locations = locations

167

range_len = 1

168

locations = None # Consumed

169

else:

# We have a match

if copy_ends is None:

# This is the first match in a range

copy_ends = [loc + 1 for loc in locations]

range_len = 1

170

# We have a match started, compare to see if any of the

171

# current matches can be continued

172

next_locations = locations.intersection([loc + 1 for loc

173

in prev_locations])

174

if next_locations:

175

# At least one of the regions continues to match

176

prev_locations = set(next_locations)

177

range_len += 1

178

locations = None # Consumed

179

else:

# We have a match started, compare to see if any of the

100

# current matches can be continued

101

next_locations = set(copy_ends).intersection(locations)

102

if next_locations:

103

# At least one of the regions continues to match

104

copy_ends = [loc + 1 for loc in next_locations]

105

range_len += 1

106

locations = None # Consumed

107

else:

108

# All current regions no longer match.

109

# This line does still match something, just not at the

110

# end of the previous matches. We will return locations

111

# so that we can avoid another _matching_lines lookup.

112

break

180

# All current regions no longer match.

181

# This line does still match something, just not at the

182

# end of the previous matches. We will return locations

183

# so that we can avoid another _matching_lines lookup.

184

break

113

185

pos += 1

114

if copy_ends is None:

186

if prev_locations is None:

115

187

# We have no matches, this is a pure insert

116

return None, pos, locations

117

return (((min(copy_ends) - range_len, range_start, range_len)),

118

pos, locations)

188

return None, pos

189

smallest = min(prev_locations)

190

return (smallest - range_len + 1, range_start, range_len), pos

119

191

120

192

def get_matching_blocks(self, lines, soft=False):

121

193

"""Return the ranges in lines which match self.lines.

133

205

# instructions.

134

206

result = []

135

207

pos = 0

136

locations = None

137

208

max_pos = len(lines)

138

209

result_append = result.append

139

min_match_bytes = 10

210

min_match_bytes = self._MIN_MATCH_BYTES

140

211

if soft:

141

min_match_bytes = 200

212

min_match_bytes = self._SOFT_MIN_MATCH_BYTES

142

213

while pos < max_pos:

143

block, pos, locations = self._get_longest_match(lines, pos,

144

locations)

214

block, pos = self._get_longest_match(lines, pos)

145

215

if block is not None:

146

216

# Check to see if we match fewer than min_match_bytes. As we

147

217

# will turn this into a pure 'insert', rather than a copy.

217

287

# reserved for content type, content length

218

288

out_lines = ['', '', encode_base128_int(bytes_length)]

219

289

index_lines = [False, False, False]

290

output_handler = _OutputHandler(out_lines, index_lines,

291

self._MIN_MATCH_BYTES)

220

292

blocks = self.get_matching_blocks(new_lines, soft=soft)

221

293

current_line_num = 0

222

294

# We either copy a range (while there are reusable lines) or we

224

296

for old_start, new_start, range_len in blocks:

225

297

if new_start != current_line_num:

226

298

# non-matching region, insert the content

227

self._flush_insert(current_line_num, new_start,

228

new_lines, out_lines, index_lines)

299

output_handler.add_insert(new_lines[current_line_num:new_start])

229

300

current_line_num = new_start + range_len

230

301

if range_len:

231

self._flush_copy(old_start, range_len, out_lines, index_lines)

302

# Convert the line based offsets into byte based offsets

303

if old_start == 0:

304

first_byte = 0

305

else:

306

first_byte = self.line_offsets[old_start - 1]

307

last_byte = self.line_offsets[old_start + range_len - 1]

308

output_handler.add_copy(first_byte, last_byte)

232

309

return out_lines, index_lines

233

310

234

311

335

412

336

413

def make_delta(source_bytes, target_bytes):

337

414

"""Create a delta from source to target."""

338

# TODO: The checks below may not be a the right place yet.

339

415

if type(source_bytes) is not str:

340

416

raise TypeError('source is not a str')

341

417

if type(target_bytes) is not str:

Older »