~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2009-02-28 05:03:49 UTC
mto: (0.23.23 groupcompress_rabin)
mto: This revision was merged to the branch mainline in revision 4280.
Revision ID: john@arbash-meinel.com-20090228050349-5b5fljgovy1ylokx

Implement a 'FAST' mode.

If we insert a text and get a 'decent' delta, then we just keep using
that delta_index until we get a bad insert. (delta > 1/2 size).
In this mode 'bzr pack' drops from 2m41s => 53s. Inventory pages
are barely effected in size, while Text pages go from 8.2MB => 9.6MB.

files modified:
groupcompress.py

Show diffs side-by-side

added added

removed removed

groupcompress.py

)

_NO_LABELS = False

_FAST = False

def parse(bytes):

if _NO_LABELS:

131

132

self.endpoint = 0

132

133

self.input_bytes = 0

133

134

self.labels_deltas = {}

135

self._last_delta_index = None

134

136

135

137

def compress(self, key, chunks, expected_sha, soft=False):

136

138

"""Compress lines with label key.

167

169

else:

168

170

new_chunks = ['label: %s\nsha1: %s\n' % (label, sha1)]

169

171

# PROF: 5s to this constant extra joining

170

source_text = ''.join(self.lines)

171

# XXX: We have a few possibilities here. We could consider a few

172

# different 'previous' windows, such as only the initial text, we

173

# could do something with the 'just inserted' text

174

# we could try a delta against whatever the last delta we

175

# computed, (the idea being we just computed the delta_index, so

176

# we re-use it here, and see if that is good enough, etc)

177

# PROF: 15s to building the delta index

178

delta_index = _groupcompress_c.make_delta_index(source_text)

172

if self._last_delta_index is not None:

173

delta_index = self._last_delta_index

174

else:

175

source_text = ''.join(self.lines)

176

# XXX: We have a few possibilities here. We could consider a few

177

# different 'previous' windows, such as only the initial text,

178

# we could do something with the 'just inserted' text we could

179

# try a delta against whatever the last delta we computed,

180

# (the idea being we just computed the delta_index, so we

181

# re-use it here, and see if that is good enough, etc)

182

# PROF: 15s to building the delta index

183

delta_index = _groupcompress_c.make_delta_index(source_text)

179

184

# PROF: only 0.67s to actually create a delta

180

185

delta = delta_index.make_delta(target_text)

186

# if delta is None and delta_index is self._last_delta_index:

187

# # So this didn't compress very well, shall we try again with a

188

# # better delta_index?

189

# source_text = ''.join(self.lines)

190

# delta_index = _groupcompress_c.make_delta_index(source_text)

191

# delta = delta_index.make_delta(target_text)

181

192

if (delta is None

182

193

or len(delta) > len(target_text) / 2):

183

194

# We can't delta (perhaps source_text is empty)

189

200

new_chunks.insert(0, 'fulltext\n')

190

201

new_chunks.append('len: %s\n' % (input_len,))

191

202

new_chunks.extend(chunks)

203

self._last_delta_index = None

192

204

else:

193

205

if _NO_LABELS:

194

206

new_chunks = ['d', delta]

196

208

new_chunks.insert(0, 'delta\n')

197

209

new_chunks.append('len: %s\n' % (len(delta),))

198

210

new_chunks.append(delta)

211

if _FAST:

212

self._last_delta_index = delta_index

199

213

delta_start = (self.endpoint, len(self.lines))

200

214

self.output_chunks(new_chunks)

201

215

self.input_bytes += input_len

Older »