~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

Merge jam python groupcompress implementation

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
    versionedfile,
27
27
    )
28
28
from bzrlib.osutils import sha_string
29
 
from bzrlib.tests import (
30
 
    TestCaseWithTransport,
31
 
    multiply_tests,
32
 
    )
 
29
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
 
30
 
 
31
 
 
32
def load_tests(standard_tests, module, loader):
 
33
    """Parameterize tests for all versions of groupcompress."""
 
34
    to_adapt, result = tests.split_suite_by_condition(
 
35
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
 
36
    scenarios = [
 
37
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
 
38
        ]
 
39
    if CompiledGroupCompressFeature.available():
 
40
        scenarios.append(('C',
 
41
            {'compressor': groupcompress.PyrexGroupCompressor}))
 
42
    return tests.multiply_tests(to_adapt, scenarios, result)
33
43
 
34
44
 
35
45
class TestGroupCompressor(tests.TestCase):
 
46
 
 
47
    def _chunks_to_repr_lines(self, chunks):
 
48
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
 
49
 
 
50
    def assertEqualDiffEncoded(self, expected, actual):
 
51
        """Compare the actual content to the expected content.
 
52
 
 
53
        :param expected: A group of chunks that we expect to see
 
54
        :param actual: The measured 'chunks'
 
55
 
 
56
        We will transform the chunks back into lines, and then run 'repr()'
 
57
        over them to handle non-ascii characters.
 
58
        """
 
59
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
 
60
                             self._chunks_to_repr_lines(actual))
 
61
 
 
62
 
 
63
class TestAllGroupCompressors(TestGroupCompressor):
36
64
    """Tests for GroupCompressor"""
37
65
 
 
66
    compressor = None # Set by multiply_tests
 
67
 
38
68
    def test_empty_delta(self):
39
 
        compressor = groupcompress.GroupCompressor()
40
 
        self.assertEqual([], compressor.lines)
 
69
        compressor = self.compressor()
 
70
        self.assertEqual([], compressor.chunks)
41
71
 
42
72
    def test_one_nosha_delta(self):
43
73
        # diff against NUKK
44
 
        compressor = groupcompress.GroupCompressor()
 
74
        compressor = self.compressor()
45
75
        sha1, start_point, end_point, _, _ = compressor.compress(('label',),
46
76
            'strange\ncommon\n', None)
47
77
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
48
 
        expected_lines = [
49
 
            'f', '\x0f', 'strange\ncommon\n',
50
 
            ]
51
 
        self.assertEqual(expected_lines, compressor.lines)
 
78
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
 
79
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
52
80
        self.assertEqual(0, start_point)
53
81
        self.assertEqual(sum(map(len, expected_lines)), end_point)
54
82
 
55
83
    def test_empty_content(self):
56
 
        compressor = groupcompress.GroupCompressor()
 
84
        compressor = self.compressor()
57
85
        # Adding empty bytes should return the 'null' record
58
86
        sha1, start_point, end_point, kind, _ = compressor.compress(('empty',),
59
87
            '', None)
62
90
        self.assertEqual('fulltext', kind)
63
91
        self.assertEqual(groupcompress._null_sha1, sha1)
64
92
        self.assertEqual(0, compressor.endpoint)
65
 
        self.assertEqual([], compressor.lines)
 
93
        self.assertEqual([], compressor.chunks)
66
94
        # Even after adding some content
67
95
        compressor.compress(('content',), 'some\nbytes\n', None)
68
96
        self.assertTrue(compressor.endpoint > 0)
73
101
        self.assertEqual('fulltext', kind)
74
102
        self.assertEqual(groupcompress._null_sha1, sha1)
75
103
 
76
 
    def _chunks_to_repr_lines(self, chunks):
77
 
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
78
 
 
79
 
    def assertEqualDiffEncoded(self, expected, actual):
80
 
        """Compare the actual content to the expected content.
81
 
 
82
 
        :param expected: A group of chunks that we expect to see
83
 
        :param actual: The measured 'chunks'
84
 
 
85
 
        We will transform the chunks back into lines, and then run 'repr()'
86
 
        over them to handle non-ascii characters.
87
 
        """
88
 
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
89
 
                             self._chunks_to_repr_lines(actual))
90
 
 
91
 
    def test_two_nosha_delta(self):
92
 
        compressor = groupcompress.GroupCompressor()
93
 
        sha1_1, _, _, _, _ = compressor.compress(('label',),
94
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
95
 
        expected_lines = list(compressor.lines)
96
 
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
97
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
98
 
        self.assertEqual(sha_string('common long line\n'
99
 
                                    'that needs a 16 byte match\n'
100
 
                                    'different\n'), sha1_2)
101
 
        expected_lines.extend([
102
 
            # 'delta', delta length
103
 
            'd\x10',
104
 
            # source and target length
105
 
            '\x36\x36',
106
 
            # copy the line common
107
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
108
 
            # add the line different, and the trailing newline
109
 
            '\x0adifferent\n', # insert 10 bytes
110
 
            ])
111
 
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
112
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
113
 
 
114
 
    def test_three_nosha_delta(self):
115
 
        # The first interesting test: make a change that should use lines from
116
 
        # both parents.
117
 
        compressor = groupcompress.GroupCompressor()
118
 
        sha1_1, _, _, _, _ = compressor.compress(('label',),
119
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
120
 
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
121
 
            'different\nmoredifferent\nand then some more\n', None)
122
 
        expected_lines = list(compressor.lines)
123
 
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
124
 
            'new\ncommon very very long line\nwith some extra text\n'
125
 
            'different\nmoredifferent\nand then some more\n',
126
 
            None)
127
 
        self.assertEqual(
128
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
129
 
                       'different\nmoredifferent\nand then some more\n'),
130
 
            sha1_3)
131
 
        expected_lines.extend([
132
 
            # 'delta', delta length
133
 
            'd\x0c',
134
 
            # source and target length
135
 
            '\x67\x5f'
136
 
            # insert new
137
 
            '\x03new',
138
 
            # Copy of first parent 'common' range
139
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
140
 
            # Copy of second parent 'different' range
141
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
142
 
            ])
143
 
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
144
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
145
 
 
146
 
    def test_stats(self):
147
 
        compressor = groupcompress.GroupCompressor()
148
 
        compressor.compress(('label',), 'strange\ncommon long line\n'
149
 
                                        'plus more text\n', None)
150
 
        compressor.compress(('newlabel',),
151
 
                            'common long line\nplus more text\n'
152
 
                            'different\nmoredifferent\n', None)
153
 
        compressor.compress(('label3',),
154
 
                            'new\ncommon long line\nplus more text\n'
155
 
                            '\ndifferent\nmoredifferent\n', None)
156
 
        self.assertAlmostEqual(1.4, compressor.ratio(), 1)
157
 
 
158
104
    def test_extract_from_compressor(self):
159
105
        # Knit fetching will try to reconstruct texts locally which results in
160
106
        # reading something that is in the compressor stream already.
161
 
        compressor = groupcompress.GroupCompressor()
 
107
        compressor = self.compressor()
162
108
        sha1_1, _, _, _, _ = compressor.compress(('label',),
163
109
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
 
        expected_lines = list(compressor.lines)
 
110
        expected_lines = list(compressor.chunks)
165
111
        sha1_2, _, end_point, _, _ = compressor.compress(('newlabel',),
166
112
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
113
        # get the first out
168
114
        self.assertEqual(('strange\ncommon long line\n'
169
115
                          'that needs a 16 byte match\n', sha1_1),
170
 
            compressor.extract(('label',)))
 
116
                         compressor.extract(('label',)))
171
117
        # and the second
172
118
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
173
119
                          'different\n', sha1_2),
174
120
                         compressor.extract(('newlabel',)))
175
121
 
176
122
 
177
 
class TestBase128Int(tests.TestCase):
178
 
 
179
 
    def assertEqualEncode(self, bytes, val):
180
 
        self.assertEqual(bytes, groupcompress.encode_base128_int(val))
181
 
 
182
 
    def assertEqualDecode(self, val, num_decode, bytes):
183
 
        self.assertEqual((val, num_decode),
184
 
                         groupcompress.decode_base128_int(bytes))
185
 
 
186
 
    def test_encode(self):
187
 
        self.assertEqualEncode('\x01', 1)
188
 
        self.assertEqualEncode('\x02', 2)
189
 
        self.assertEqualEncode('\x7f', 127)
190
 
        self.assertEqualEncode('\x80\x01', 128)
191
 
        self.assertEqualEncode('\xff\x01', 255)
192
 
        self.assertEqualEncode('\x80\x02', 256)
193
 
        self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
194
 
 
195
 
    def test_decode(self):
196
 
        self.assertEqualDecode(1, 1, '\x01')
197
 
        self.assertEqualDecode(2, 1, '\x02')
198
 
        self.assertEqualDecode(127, 1, '\x7f')
199
 
        self.assertEqualDecode(128, 2, '\x80\x01')
200
 
        self.assertEqualDecode(255, 2, '\xff\x01')
201
 
        self.assertEqualDecode(256, 2, '\x80\x02')
202
 
        self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
203
 
 
204
 
    def test_decode_with_trailing_bytes(self):
205
 
        self.assertEqualDecode(1, 1, '\x01abcdef')
206
 
        self.assertEqualDecode(127, 1, '\x7f\x01')
207
 
        self.assertEqualDecode(128, 2, '\x80\x01abcdef')
208
 
        self.assertEqualDecode(255, 2, '\xff\x01\xff')
 
123
class TestPyrexGroupCompressor(TestGroupCompressor):
 
124
 
 
125
    _test_needs_features = [CompiledGroupCompressFeature]
 
126
    compressor = groupcompress.PyrexGroupCompressor
 
127
 
 
128
    def test_stats(self):
 
129
        compressor = self.compressor()
 
130
        compressor.compress(('label',),
 
131
                            'strange\n'
 
132
                            'common very very long line\n'
 
133
                            'plus more text\n', None)
 
134
        compressor.compress(('newlabel',),
 
135
                            'common very very long line\n'
 
136
                            'plus more text\n'
 
137
                            'different\n'
 
138
                            'moredifferent\n', None)
 
139
        compressor.compress(('label3',),
 
140
                            'new\n'
 
141
                            'common very very long line\n'
 
142
                            'plus more text\n'
 
143
                            'different\n'
 
144
                            'moredifferent\n', None)
 
145
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
146
 
 
147
    def test_two_nosha_delta(self):
 
148
        compressor = self.compressor()
 
149
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
150
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
151
        expected_lines = list(compressor.chunks)
 
152
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
 
153
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
154
        self.assertEqual(sha_string('common long line\n'
 
155
                                    'that needs a 16 byte match\n'
 
156
                                    'different\n'), sha1_2)
 
157
        expected_lines.extend([
 
158
            # 'delta', delta length
 
159
            'd\x0f',
 
160
            # source and target length
 
161
            '\x36',
 
162
            # copy the line common
 
163
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
164
            # add the line different, and the trailing newline
 
165
            '\x0adifferent\n', # insert 10 bytes
 
166
            ])
 
167
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
168
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
169
 
 
170
    def test_three_nosha_delta(self):
 
171
        # The first interesting test: make a change that should use lines from
 
172
        # both parents.
 
173
        compressor = self.compressor()
 
174
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
175
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
176
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
 
177
            'different\nmoredifferent\nand then some more\n', None)
 
178
        expected_lines = list(compressor.chunks)
 
179
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
 
180
            'new\ncommon very very long line\nwith some extra text\n'
 
181
            'different\nmoredifferent\nand then some more\n',
 
182
            None)
 
183
        self.assertEqual(
 
184
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
185
                       'different\nmoredifferent\nand then some more\n'),
 
186
            sha1_3)
 
187
        expected_lines.extend([
 
188
            # 'delta', delta length
 
189
            'd\x0b',
 
190
            # source and target length
 
191
            '\x5f'
 
192
            # insert new
 
193
            '\x03new',
 
194
            # Copy of first parent 'common' range
 
195
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
 
196
            # Copy of second parent 'different' range
 
197
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
198
            ])
 
199
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
200
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
201
 
 
202
 
 
203
class TestPythonGroupCompressor(TestGroupCompressor):
 
204
 
 
205
    compressor = groupcompress.PythonGroupCompressor
 
206
 
 
207
    def test_stats(self):
 
208
        compressor = self.compressor()
 
209
        compressor.compress(('label',),
 
210
                            'strange\n'
 
211
                            'common very very long line\n'
 
212
                            'plus more text\n', None)
 
213
        compressor.compress(('newlabel',),
 
214
                            'common very very long line\n'
 
215
                            'plus more text\n'
 
216
                            'different\n'
 
217
                            'moredifferent\n', None)
 
218
        compressor.compress(('label3',),
 
219
                            'new\n'
 
220
                            'common very very long line\n'
 
221
                            'plus more text\n'
 
222
                            'different\n'
 
223
                            'moredifferent\n', None)
 
224
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
 
225
 
 
226
    def test_two_nosha_delta(self):
 
227
        compressor = self.compressor()
 
228
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
229
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
230
        expected_lines = list(compressor.chunks)
 
231
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
 
232
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
233
        self.assertEqual(sha_string('common long line\n'
 
234
                                    'that needs a 16 byte match\n'
 
235
                                    'different\n'), sha1_2)
 
236
        expected_lines.extend([
 
237
            # 'delta', delta length
 
238
            'd\x0f',
 
239
            # target length
 
240
            '\x36',
 
241
            # copy the line common
 
242
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
 
243
            # add the line different, and the trailing newline
 
244
            '\x0adifferent\n', # insert 10 bytes
 
245
            ])
 
246
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
247
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
248
 
 
249
    def test_three_nosha_delta(self):
 
250
        # The first interesting test: make a change that should use lines from
 
251
        # both parents.
 
252
        compressor = self.compressor()
 
253
        sha1_1, _, _, _, _ = compressor.compress(('label',),
 
254
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
255
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
 
256
            'different\nmoredifferent\nand then some more\n', None)
 
257
        expected_lines = list(compressor.chunks)
 
258
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
 
259
            'new\ncommon very very long line\nwith some extra text\n'
 
260
            'different\nmoredifferent\nand then some more\n',
 
261
            None)
 
262
        self.assertEqual(
 
263
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
264
                       'different\nmoredifferent\nand then some more\n'),
 
265
            sha1_3)
 
266
        expected_lines.extend([
 
267
            # 'delta', delta length
 
268
            'd\x0c',
 
269
            # target length
 
270
            '\x5f'
 
271
            # insert new
 
272
            '\x04new\n',
 
273
            # Copy of first parent 'common' range
 
274
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
 
275
            # Copy of second parent 'different' range
 
276
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
 
277
            ])
 
278
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
 
279
        self.assertEqual(sum(map(len, expected_lines)), end_point)
209
280
 
210
281
 
211
282
class TestGroupCompressBlock(tests.TestCase):
216
287
        start = 0
217
288
        for key in sorted(key_to_text):
218
289
            compressor.compress(key, key_to_text[key], None)
 
290
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
291
                    in compressor.labels_deltas.iteritems())
219
292
        block = compressor.flush()
220
 
        entries = block._entries
 
293
        raw_bytes = block.to_bytes()
221
294
        # Go through from_bytes(to_bytes()) so that we start with a compressed
222
295
        # content object
223
 
        return entries, groupcompress.GroupCompressBlock.from_bytes(
224
 
            block.to_bytes())
 
296
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
225
297
 
226
298
    def test_from_empty_bytes(self):
227
299
        self.assertRaises(ValueError,
229
301
 
230
302
    def test_from_minimal_bytes(self):
231
303
        block = groupcompress.GroupCompressBlock.from_bytes(
232
 
            'gcb1z\n0\n0\n0\n0\n')
 
304
            'gcb1z\n0\n0\n')
233
305
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
234
 
        self.assertEqual({}, block._entries)
235
306
        self.assertIs(None, block._content)
236
307
        self.assertEqual('', block._z_content)
237
308
        block._ensure_content()
239
310
        self.assertEqual('', block._z_content)
240
311
        block._ensure_content() # Ensure content is safe to call 2x
241
312
 
242
 
    def test_from_bytes_with_labels(self):
243
 
        header = ('key:bing\n'
244
 
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
245
 
            'type:fulltext\n'
246
 
            'start:100\n'
247
 
            'length:100\n'
248
 
            '\n'
249
 
            'key:foo\x00bar\n'
250
 
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
251
 
            'type:fulltext\n'
252
 
            'start:0\n'
253
 
            'length:100\n'
254
 
            '\n')
255
 
        z_header = zlib.compress(header)
 
313
    def test_from_bytes(self):
256
314
        content = ('a tiny bit of content\n')
257
315
        z_content = zlib.compress(content)
258
316
        z_bytes = (
259
317
            'gcb1z\n' # group compress block v1 plain
260
 
            '%d\n' # Length of zlib bytes
261
 
            '%d\n' # Length of all meta-info
262
318
            '%d\n' # Length of compressed content
263
319
            '%d\n' # Length of uncompressed content
264
 
            '%s'   # Compressed header
265
320
            '%s'   # Compressed content
266
 
            ) % (len(z_header), len(header),
267
 
                 len(z_content), len(content),
268
 
                 z_header, z_content)
 
321
            ) % (len(z_content), len(content), z_content)
269
322
        block = groupcompress.GroupCompressBlock.from_bytes(
270
323
            z_bytes)
271
 
        block._parse_header()
272
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
273
 
        self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
274
 
        bing = block._entries[('bing',)]
275
 
        self.assertEqual(('bing',), bing.key)
276
 
        self.assertEqual('fulltext', bing.type)
277
 
        self.assertEqual('abcd'*10, bing.sha1)
278
 
        self.assertEqual(100, bing.start)
279
 
        self.assertEqual(100, bing.length)
280
 
        foobar = block._entries[('foo', 'bar')]
281
 
        self.assertEqual(('foo', 'bar'), foobar.key)
282
 
        self.assertEqual('fulltext', foobar.type)
283
 
        self.assertEqual('abcd'*10, foobar.sha1)
284
 
        self.assertEqual(0, foobar.start)
285
 
        self.assertEqual(100, foobar.length)
286
324
        self.assertEqual(z_content, block._z_content)
287
325
        self.assertIs(None, block._content)
288
 
        block._ensure_content()
289
 
        self.assertEqual(z_content, block._z_content)
290
 
        self.assertEqual(content, block._content)
291
 
 
292
 
    def test_from_old_bytes(self):
293
 
        # Backwards compatibility, with groups that didn't define content length
294
 
        content = ('a tiny bit of content\n')
295
 
        z_content = zlib.compress(content)
296
 
        z_bytes = (
297
 
            'gcb1z\n' # group compress block v1 plain
298
 
            '0\n' # Length of zlib bytes
299
 
            '0\n' # Length of all meta-info
300
 
            ''    # Compressed header
301
 
            '%s'   # Compressed content
302
 
            ) % (z_content)
303
 
        block = groupcompress.GroupCompressBlock.from_bytes(
304
 
            z_bytes)
305
 
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
306
 
        block._ensure_content()
307
 
        self.assertEqual(z_content, block._z_content)
308
 
        self.assertEqual(content, block._content)
309
 
 
310
 
    def test_add_entry(self):
311
 
        gcb = groupcompress.GroupCompressBlock()
312
 
        e = gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
313
 
        self.assertIsInstance(e, groupcompress.GroupCompressBlockEntry)
314
 
        self.assertEqual(('foo', 'bar'), e.key)
315
 
        self.assertEqual('fulltext', e.type)
316
 
        self.assertEqual('abcd'*10, e.sha1)
317
 
        self.assertEqual(0, e.start)
318
 
        self.assertEqual(100, e.length)
 
326
        self.assertEqual(len(z_content), block._z_content_length)
 
327
        self.assertEqual(len(content), block._content_length)
 
328
        block._ensure_content()
 
329
        self.assertEqual(z_content, block._z_content)
 
330
        self.assertEqual(content, block._content)
319
331
 
320
332
    def test_to_bytes(self):
321
 
        no_labels = groupcompress._NO_LABELS
322
 
        def reset():
323
 
            groupcompress._NO_LABELS = no_labels
324
 
        self.addCleanup(reset)
325
 
        groupcompress._NO_LABELS = False
 
333
        content = ('this is some content\n'
 
334
                   'this content will be compressed\n')
326
335
        gcb = groupcompress.GroupCompressBlock()
327
 
        gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
328
 
        gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
329
 
        gcb.set_content('this is some content\n'
330
 
                        'this content will be compressed\n')
 
336
        gcb.set_content(content)
331
337
        bytes = gcb.to_bytes()
 
338
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
 
339
        self.assertEqual(gcb._content_length, len(content))
332
340
        expected_header =('gcb1z\n' # group compress block v1 zlib
333
 
                          '76\n' # Length of compressed bytes
334
 
                          '183\n' # Length of uncompressed meta-info
335
 
                          '50\n' # Length of compressed content
336
 
                          '53\n' # Length of uncompressed content
337
 
                         )
 
341
                          '%d\n' # Length of compressed content
 
342
                          '%d\n' # Length of uncompressed content
 
343
                         ) % (gcb._z_content_length, gcb._content_length)
338
344
        self.assertStartsWith(bytes, expected_header)
339
345
        remaining_bytes = bytes[len(expected_header):]
340
346
        raw_bytes = zlib.decompress(remaining_bytes)
341
 
        self.assertEqualDiff('key:bing\n'
342
 
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
343
 
                             'type:fulltext\n'
344
 
                             'start:100\n'
345
 
                             'length:100\n'
346
 
                             '\n'
347
 
                             'key:foo\x00bar\n'
348
 
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
349
 
                             'type:fulltext\n'
350
 
                             'start:0\n'
351
 
                             'length:100\n'
352
 
                             '\n', raw_bytes)
 
347
        self.assertEqual(content, raw_bytes)
353
348
 
354
349
    def test_partial_decomp(self):
355
350
        content_chunks = []
595
590
        ('key3',): "yet another text which won't be extracted\n"
596
591
                   "with a reasonable amount of compressible bytes\n",
597
592
        ('key4',): "this will be extracted\n"
598
 
                   "but references bytes from\n"
 
593
                   "but references most of its bytes from\n"
599
594
                   "yet another text which won't be extracted\n"
600
595
                   "with a reasonable amount of compressible bytes\n",
601
596
    }
605
600
        start = 0
606
601
        for key in sorted(key_to_text):
607
602
            compressor.compress(key, key_to_text[key], None)
 
603
        locs = dict((key, (start, end)) for key, (start, _, end, _)
 
604
                    in compressor.labels_deltas.iteritems())
608
605
        block = compressor.flush()
609
 
        entries = block._entries
610
606
        raw_bytes = block.to_bytes()
611
 
        return entries, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
 
607
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
612
608
 
613
 
    def add_key_to_manager(self, key, entries, block, manager):
614
 
        entry = entries[key]
615
 
        manager.add_factory(entry.key, (), entry.start, entry.end)
 
609
    def add_key_to_manager(self, key, locations, block, manager):
 
610
        start, end = locations[key]
 
611
        manager.add_factory(key, (), start, end)
616
612
 
617
613
    def test_get_fulltexts(self):
618
 
        entries, block = self.make_block(self._texts)
 
614
        locations, block = self.make_block(self._texts)
619
615
        manager = groupcompress._LazyGroupContentManager(block)
620
 
        self.add_key_to_manager(('key1',), entries, block, manager)
621
 
        self.add_key_to_manager(('key2',), entries, block, manager)
 
616
        self.add_key_to_manager(('key1',), locations, block, manager)
 
617
        self.add_key_to_manager(('key2',), locations, block, manager)
622
618
        result_order = []
623
619
        for record in manager.get_record_stream():
624
620
            result_order.append(record.key)
629
625
        # If we build the manager in the opposite order, we should get them
630
626
        # back in the opposite order
631
627
        manager = groupcompress._LazyGroupContentManager(block)
632
 
        self.add_key_to_manager(('key2',), entries, block, manager)
633
 
        self.add_key_to_manager(('key1',), entries, block, manager)
 
628
        self.add_key_to_manager(('key2',), locations, block, manager)
 
629
        self.add_key_to_manager(('key1',), locations, block, manager)
634
630
        result_order = []
635
631
        for record in manager.get_record_stream():
636
632
            result_order.append(record.key)
639
635
        self.assertEqual([('key2',), ('key1',)], result_order)
640
636
 
641
637
    def test__wire_bytes_no_keys(self):
642
 
        entries, block = self.make_block(self._texts)
 
638
        locations, block = self.make_block(self._texts)
643
639
        manager = groupcompress._LazyGroupContentManager(block)
644
640
        wire_bytes = manager._wire_bytes()
645
641
        block_length = len(block.to_bytes())
658
654
                         wire_bytes)
659
655
 
660
656
    def test__wire_bytes(self):
661
 
        entries, block = self.make_block(self._texts)
 
657
        locations, block = self.make_block(self._texts)
662
658
        manager = groupcompress._LazyGroupContentManager(block)
663
 
        self.add_key_to_manager(('key1',), entries, block, manager)
664
 
        self.add_key_to_manager(('key4',), entries, block, manager)
 
659
        self.add_key_to_manager(('key1',), locations, block, manager)
 
660
        self.add_key_to_manager(('key4',), locations, block, manager)
665
661
        block_bytes = block.to_bytes()
666
662
        wire_bytes = manager._wire_bytes()
667
663
        (storage_kind, z_header_len, header_len,
676
672
        z_header = rest[:z_header_len]
677
673
        header = zlib.decompress(z_header)
678
674
        self.assertEqual(header_len, len(header))
679
 
        entry1 = entries[('key1',)]
680
 
        entry4 = entries[('key4',)]
 
675
        entry1 = locations[('key1',)]
 
676
        entry4 = locations[('key4',)]
681
677
        self.assertEqualDiff('key1\n'
682
678
                             '\n'  # no parents
683
679
                             '%d\n' # start offset
684
 
                             '%d\n' # end byte
 
680
                             '%d\n' # end offset
685
681
                             'key4\n'
686
682
                             '\n'
687
683
                             '%d\n'
688
684
                             '%d\n'
689
 
                             % (entry1.start, entry1.end,
690
 
                                entry4.start, entry4.end),
 
685
                             % (entry1[0], entry1[1],
 
686
                                entry4[0], entry4[1]),
691
687
                            header)
692
688
        z_block = rest[z_header_len:]
693
689
        self.assertEqual(block_bytes, z_block)
694
690
 
695
691
    def test_from_bytes(self):
696
 
        entries, block = self.make_block(self._texts)
 
692
        locations, block = self.make_block(self._texts)
697
693
        manager = groupcompress._LazyGroupContentManager(block)
698
 
        self.add_key_to_manager(('key1',), entries, block, manager)
699
 
        self.add_key_to_manager(('key4',), entries, block, manager)
 
694
        self.add_key_to_manager(('key1',), locations, block, manager)
 
695
        self.add_key_to_manager(('key4',), locations, block, manager)
700
696
        wire_bytes = manager._wire_bytes()
701
697
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
702
698
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
711
707
        self.assertEqual([('key1',), ('key4',)], result_order)
712
708
 
713
709
    def test__check_rebuild_no_changes(self):
714
 
        entries, block = self.make_block(self._texts)
 
710
        locations, block = self.make_block(self._texts)
715
711
        manager = groupcompress._LazyGroupContentManager(block)
716
712
        # Request all the keys, which ensures that we won't rebuild
717
 
        self.add_key_to_manager(('key1',), entries, block, manager)
718
 
        self.add_key_to_manager(('key2',), entries, block, manager)
719
 
        self.add_key_to_manager(('key3',), entries, block, manager)
720
 
        self.add_key_to_manager(('key4',), entries, block, manager)
 
713
        self.add_key_to_manager(('key1',), locations, block, manager)
 
714
        self.add_key_to_manager(('key2',), locations, block, manager)
 
715
        self.add_key_to_manager(('key3',), locations, block, manager)
 
716
        self.add_key_to_manager(('key4',), locations, block, manager)
721
717
        manager._check_rebuild_block()
722
718
        self.assertIs(block, manager._block)
723
719
 
724
720
    def test__check_rebuild_only_one(self):
725
 
        entries, block = self.make_block(self._texts)
 
721
        locations, block = self.make_block(self._texts)
726
722
        manager = groupcompress._LazyGroupContentManager(block)
727
723
        # Request just the first key, which should trigger a 'strip' action
728
 
        self.add_key_to_manager(('key1',), entries, block, manager)
 
724
        self.add_key_to_manager(('key1',), locations, block, manager)
729
725
        manager._check_rebuild_block()
730
726
        self.assertIsNot(block, manager._block)
731
727
        self.assertTrue(block._content_length > manager._block._content_length)
737
733
                             record.get_bytes_as('fulltext'))
738
734
 
739
735
    def test__check_rebuild_middle(self):
740
 
        entries, block = self.make_block(self._texts)
 
736
        locations, block = self.make_block(self._texts)
741
737
        manager = groupcompress._LazyGroupContentManager(block)
742
738
        # Request a small key in the middle should trigger a 'rebuild'
743
 
        self.add_key_to_manager(('key4',), entries, block, manager)
 
739
        self.add_key_to_manager(('key4',), locations, block, manager)
744
740
        manager._check_rebuild_block()
745
741
        self.assertIsNot(block, manager._block)
746
742
        self.assertTrue(block._content_length > manager._block._content_length)