~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to groupcompress.py

Committer: John Arbash Meinel
Date: 2009-02-19 20:45:00 UTC
mto: (0.22.4 experimental)
mto: This revision was merged to the branch mainline in revision 4280.
Revision ID: john@arbash-meinel.com-20090219204500-1wb0k8f962lansy6

start experimenting with gc-optimal ordering.

files modified:
groupcompress.py

repofmt.py

Show diffs side-by-side

added added

removed removed

groupcompress.py

result.append((op, None, numbers[0], contents))

return label, sha1, result

def apply_delta(basis, delta):

"""Apply delta to this object to become new_version_id."""

lines = []

lines[-1] = lines[-1][:-1]

100

101

102

def sort_gc_optimal(parent_map):

103

"""Sort and group the keys in parent_map into gc-optimal order.

104

105

gc-optimal is defined (currently) as reverse-topological order, grouped by

106

the key prefix.

107

108

:return: A sorted-list of keys

109

"""

110

# gc-optimal ordering is approximately reverse topological,

111

# properly grouped by file-id.

112

per_prefix_map = {'': []}

113

present_keys = []

114

for item in parent_map.iteritems():

115

key = item[0]

116

if isinstance(key, str) or len(key) == 1:

117

per_prefix_map[''].append(item)

118

else:

119

try:

120

per_prefix_map[key[0]].append(item)

121

except KeyError:

122

per_prefix_map[key[0]] = [item]

123

124

for prefix in sorted(per_prefix_map):

125

present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))

126

return present_keys

127

128

100

129

class GroupCompressor(object):

101

130

"""Produce a serialised group of compressed texts.

102

131

145

174

# insert new lines. To find reusable lines we traverse

146

175

locations = None

147

176

max_pos = len(lines)

148

max_time = 0.0

149

max_info = None

150

177

result_append = result.append

151

178

while pos < max_pos:

152

179

block, pos, locations = _get_longest_match(line_locations, pos,

481

508

parent_map[key] = self._unadded_refs[key]

482

509

present_keys = topo_sort(parent_map)

483

510

# Now group by source:

484

# TODO: Support a group-compress 'optimal' ordering

511

elif ordering == 'gc-optimal':

512

parent_map = dict((key, details[2]) for key, details in

513

locations.iteritems())

514

for key in local_keys:

515

parent_map[key] = self._unadded_refs[key]

516

# XXX: This only optimizes for the target ordering. We may need to

517

# balance that with the time it takes to extract ordering, by

518

# somehow grouping based on locations[key][0:3]

519

present_keys = sort_gc_optimal(parent_map)

485

520

else:

486

521

# We want to yield the keys in a semi-optimal (read-wise) ordering.

487

522

# Otherwise we thrash the _group_cache and destroy performance

488

523

def get_group(key):

489

# This is the group the bytes are stored in

490

return locations[key][0:3]

524

# This is the group the bytes are stored in, followed by the

525

# location in the group

526

return locations[key][0]

491

527

present_keys = sorted(locations.iterkeys(), key=get_group)

492

528

# We don't have an ordering for keys in the in-memory object, but

493

529

# lets process the in-memory ones first.

Older »