~bzr-pqm/bzr/bzr.dev

3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
1
# Copyright (C) 2008, 2009 Canonical Ltd
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
22
    btree_index,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
23
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
24
    errors,
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
25
    index as _mod_index,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
26
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
27
    tests,
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
28
    trace,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
29
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
30
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
31
from bzrlib.osutils import sha_string
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
33
34
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
35
def load_tests(standard_tests, module, loader):
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
36
    """Parameterize tests for all versions of groupcompress."""
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
37
    to_adapt, result = tests.split_suite_by_condition(
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
    scenarios = [
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
        ]
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
42
    if compiled_groupcompress_feature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
43
        scenarios.append(('C',
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
45
    return tests.multiply_tests(to_adapt, scenarios, result)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
46
47
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
48
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
49
50
    def _chunks_to_repr_lines(self, chunks):
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
53
    def assertEqualDiffEncoded(self, expected, actual):
54
        """Compare the actual content to the expected content.
55
56
        :param expected: A group of chunks that we expect to see
57
        :param actual: The measured 'chunks'
58
59
        We will transform the chunks back into lines, and then run 'repr()'
60
        over them to handle non-ascii characters.
61
        """
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
                             self._chunks_to_repr_lines(actual))
64
65
66
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
67
    """Tests for GroupCompressor"""
68
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
69
    compressor = None # Set by multiply_tests
70
0.17.2 by Robert Collins
Core proof of concept working.
71
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
72
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
73
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
74
75
    def test_one_nosha_delta(self):
76
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
77
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
79
            'strange\ncommon\n', None)
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
83
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
86
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
87
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
88
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
91
        self.assertEqual(0, start_point)
92
        self.assertEqual(0, end_point)
93
        self.assertEqual('fulltext', kind)
94
        self.assertEqual(groupcompress._null_sha1, sha1)
95
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
96
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
97
        # Even after adding some content
98
        compressor.compress(('content',), 'some\nbytes\n', None)
99
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
102
        self.assertEqual(0, start_point)
103
        self.assertEqual(0, end_point)
104
        self.assertEqual('fulltext', kind)
105
        self.assertEqual(groupcompress._null_sha1, sha1)
106
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
107
    def test_extract_from_compressor(self):
108
        # Knit fetching will try to reconstruct texts locally which results in
109
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
110
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
111
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
113
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
116
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
117
        self.assertEqual(('strange\ncommon long line\n'
118
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
119
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
120
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
                          'different\n', sha1_2),
123
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
124
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
125
    def test_pop_last(self):
126
        compressor = self.compressor()
127
        _, _, _, _ = compressor.compress(('key1',),
128
            'some text\nfor the first entry\n', None)
129
        expected_lines = list(compressor.chunks)
130
        _, _, _, _ = compressor.compress(('key2',),
131
            'some text\nfor the second entry\n', None)
132
        compressor.pop_last()
133
        self.assertEqual(expected_lines, compressor.chunks)
134
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
135
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
136
class TestPyrexGroupCompressor(TestGroupCompressor):
137
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
138
    _test_needs_features = [compiled_groupcompress_feature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
139
    compressor = groupcompress.PyrexGroupCompressor
140
141
    def test_stats(self):
142
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
143
        compressor.compress(('label',),
144
                            'strange\n'
145
                            'common very very long line\n'
146
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
147
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
148
                            'common very very long line\n'
149
                            'plus more text\n'
150
                            'different\n'
151
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
152
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
153
                            'new\n'
154
                            'common very very long line\n'
155
                            'plus more text\n'
156
                            'different\n'
157
                            'moredifferent\n', None)
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
159
160
    def test_two_nosha_delta(self):
161
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
162
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
164
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
        self.assertEqual(sha_string('common long line\n'
168
                                    'that needs a 16 byte match\n'
169
                                    'different\n'), sha1_2)
170
        expected_lines.extend([
171
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
172
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
173
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
174
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
175
            # copy the line common
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
            # add the line different, and the trailing newline
178
            '\x0adifferent\n', # insert 10 bytes
179
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
183
    def test_three_nosha_delta(self):
184
        # The first interesting test: make a change that should use lines from
185
        # both parents.
186
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
187
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
190
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
191
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
193
            'new\ncommon very very long line\nwith some extra text\n'
194
            'different\nmoredifferent\nand then some more\n',
195
            None)
196
        self.assertEqual(
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
                       'different\nmoredifferent\nand then some more\n'),
199
            sha1_3)
200
        expected_lines.extend([
201
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
202
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
203
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
204
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
205
            # insert new
206
            '\x03new',
207
            # Copy of first parent 'common' range
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
            # Copy of second parent 'different' range
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
215
216
class TestPythonGroupCompressor(TestGroupCompressor):
217
218
    compressor = groupcompress.PythonGroupCompressor
219
220
    def test_stats(self):
221
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
222
        compressor.compress(('label',),
223
                            'strange\n'
224
                            'common very very long line\n'
225
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
226
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
227
                            'common very very long line\n'
228
                            'plus more text\n'
229
                            'different\n'
230
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
231
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
232
                            'new\n'
233
                            'common very very long line\n'
234
                            'plus more text\n'
235
                            'different\n'
236
                            'moredifferent\n', None)
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
238
239
    def test_two_nosha_delta(self):
240
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
241
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
243
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
        self.assertEqual(sha_string('common long line\n'
247
                                    'that needs a 16 byte match\n'
248
                                    'different\n'), sha1_2)
249
        expected_lines.extend([
250
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
251
            'd\x0f',
252
            # target length
253
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
254
            # copy the line common
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
            # add the line different, and the trailing newline
257
            '\x0adifferent\n', # insert 10 bytes
258
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
262
    def test_three_nosha_delta(self):
263
        # The first interesting test: make a change that should use lines from
264
        # both parents.
265
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
266
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
269
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
270
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
272
            'new\ncommon very very long line\nwith some extra text\n'
273
            'different\nmoredifferent\nand then some more\n',
274
            None)
275
        self.assertEqual(
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
                       'different\nmoredifferent\nand then some more\n'),
278
            sha1_3)
279
        expected_lines.extend([
280
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
281
            'd\x0c',
282
            # target length
283
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
284
            # insert new
285
            '\x04new\n',
286
            # Copy of first parent 'common' range
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
            # Copy of second parent 'different' range
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
293
294
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
295
class TestGroupCompressBlock(tests.TestCase):
296
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
297
    def make_block(self, key_to_text):
298
        """Create a GroupCompressBlock, filling it with the given texts."""
299
        compressor = groupcompress.GroupCompressor()
300
        start = 0
301
        for key in sorted(key_to_text):
302
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
305
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
306
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
310
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
311
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
312
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
315
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
316
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
317
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
319
        self.assertIs(None, block._content)
320
        self.assertEqual('', block._z_content)
321
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
322
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
323
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
324
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
325
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
326
    def test_from_invalid(self):
327
        self.assertRaises(ValueError,
328
                          groupcompress.GroupCompressBlock.from_bytes,
329
                          'this is not a valid header')
330
3735.38.4 by John Arbash Meinel
Another disk format change.
331
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
332
        content = ('a tiny bit of content\n')
333
        z_content = zlib.compress(content)
334
        z_bytes = (
335
            'gcb1z\n' # group compress block v1 plain
336
            '%d\n' # Length of compressed content
337
            '%d\n' # Length of uncompressed content
338
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
339
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
340
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
341
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
342
        self.assertEqual(z_content, block._z_content)
343
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
344
        self.assertEqual(len(z_content), block._z_content_length)
345
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
346
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
347
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
348
        self.assertEqual(content, block._content)
349
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
350
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
351
        content = ('this is some content\n'
352
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
353
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
354
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
355
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
356
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
358
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
359
                          '%d\n' # Length of compressed content
360
                          '%d\n' # Length of uncompressed content
361
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
362
        self.assertStartsWith(bytes, expected_header)
363
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
364
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
365
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
366
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
367
        # we should get the same results if using the chunked version
368
        gcb = groupcompress.GroupCompressBlock()
369
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
370
                                 'this content will be compressed\n'],
371
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
372
        old_bytes = bytes
373
        bytes = gcb.to_bytes()
374
        self.assertEqual(old_bytes, bytes)
375
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
376
    def test_partial_decomp(self):
377
        content_chunks = []
378
        # We need a sufficient amount of data so that zlib.decompress has
379
        # partial decompression to work with. Most auto-generated data
380
        # compresses a bit too well, we want a combination, so we combine a sha
381
        # hash with compressible data.
382
        for i in xrange(2048):
383
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
            content_chunks.append(next_content)
385
            next_sha1 = osutils.sha_string(next_content)
386
            content_chunks.append(next_sha1 + '\n')
387
        content = ''.join(content_chunks)
388
        self.assertEqual(158634, len(content))
389
        z_content = zlib.compress(content)
390
        self.assertEqual(57182, len(z_content))
391
        block = groupcompress.GroupCompressBlock()
392
        block._z_content = z_content
393
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
394
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
395
        block._content_length = 158634
396
        self.assertIs(None, block._content)
397
        block._ensure_content(100)
398
        self.assertIsNot(None, block._content)
399
        # We have decompressed at least 100 bytes
400
        self.assertTrue(len(block._content) >= 100)
401
        # We have not decompressed the whole content
402
        self.assertTrue(len(block._content) < 158634)
403
        self.assertEqualDiff(content[:len(block._content)], block._content)
404
        # ensuring content that we already have shouldn't cause any more data
405
        # to be extracted
406
        cur_len = len(block._content)
407
        block._ensure_content(cur_len - 10)
408
        self.assertEqual(cur_len, len(block._content))
409
        # Now we want a bit more content
410
        cur_len += 10
411
        block._ensure_content(cur_len)
412
        self.assertTrue(len(block._content) >= cur_len)
413
        self.assertTrue(len(block._content) < 158634)
414
        self.assertEqualDiff(content[:len(block._content)], block._content)
415
        # And now lets finish
416
        block._ensure_content(158634)
417
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
418
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
419
        self.assertIs(None, block._z_content_decompressor)
420
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
421
    def test__ensure_all_content(self):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
422
        content_chunks = []
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
423
        # We need a sufficient amount of data so that zlib.decompress has
424
        # partial decompression to work with. Most auto-generated data
425
        # compresses a bit too well, we want a combination, so we combine a sha
426
        # hash with compressible data.
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
427
        for i in xrange(2048):
428
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
            content_chunks.append(next_content)
430
            next_sha1 = osutils.sha_string(next_content)
431
            content_chunks.append(next_sha1 + '\n')
432
        content = ''.join(content_chunks)
433
        self.assertEqual(158634, len(content))
434
        z_content = zlib.compress(content)
435
        self.assertEqual(57182, len(z_content))
436
        block = groupcompress.GroupCompressBlock()
437
        block._z_content = z_content
438
        block._z_content_length = len(z_content)
439
        block._compressor_name = 'zlib'
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
440
        block._content_length = 158634
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
441
        self.assertIs(None, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
442
        # The first _ensure_content got all of the required data
443
        block._ensure_content(158634)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
444
        self.assertEqualDiff(content, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
445
        # And we should have released the _z_content_decompressor since it was
446
        # fully consumed
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
447
        self.assertIs(None, block._z_content_decompressor)
448
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
449
    def test__dump(self):
450
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
        key_to_text = {('1',): dup_content + '1 unique\n',
452
                       ('2',): dup_content + '2 extra special\n'}
453
        locs, block = self.make_block(key_to_text)
454
        self.assertEqual([('f', len(key_to_text[('1',)])),
455
                          ('d', 21, len(key_to_text[('2',)]),
456
                           [('c', 2, len(dup_content)),
457
                            ('i', len('2 extra special\n'), '')
458
                           ]),
459
                         ], block._dump())
460
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
461
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
462
class TestCaseWithGroupCompressVersionedFiles(
463
        tests.TestCaseWithMemoryTransport):
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
464
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
465
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
466
                     dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
467
        t = self.get_transport(dir)
468
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
469
        vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
470
            delta=False, keylength=keylength,
471
            inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
472
        if do_cleanup:
473
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
        return vf
475
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
476
477
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
478
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
479
    def make_g_index(self, name, ref_lists=0, nodes=[]):
480
        builder = btree_index.BTreeBuilder(ref_lists)
481
        for node, references, value in nodes:
482
            builder.add_node(node, references, value)
483
        stream = builder.finish()
484
        trans = self.get_transport()
485
        size = trans.put_file(name, stream)
486
        return btree_index.BTreeGraphIndex(trans, name, size)
487
488
    def make_g_index_missing_parent(self):
489
        graph_index = self.make_g_index('missing_parent', 1,
490
            [(('parent', ), '2 78 2 10', ([],)),
491
             (('tip', ), '2 78 2 10',
492
              ([('parent', ), ('missing-parent', )],)),
493
              ])
494
        return graph_index
495
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
496
    def test_get_record_stream_as_requested(self):
497
        # Consider promoting 'as-requested' to general availability, and
498
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
499
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
500
        vf.add_lines(('a',), (), ['lines\n'])
501
        vf.add_lines(('b',), (), ['lines\n'])
502
        vf.add_lines(('c',), (), ['lines\n'])
503
        vf.add_lines(('d',), (), ['lines\n'])
504
        vf.writer.end()
505
        keys = [record.key for record in vf.get_record_stream(
506
                    [('a',), ('b',), ('c',), ('d',)],
507
                    'as-requested', False)]
508
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
509
        keys = [record.key for record in vf.get_record_stream(
510
                    [('b',), ('a',), ('d',), ('c',)],
511
                    'as-requested', False)]
512
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
513
514
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
515
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
516
        vf2.insert_record_stream(vf.get_record_stream(
517
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
518
        vf2.writer.end()
519
520
        keys = [record.key for record in vf2.get_record_stream(
521
                    [('a',), ('b',), ('c',), ('d',)],
522
                    'as-requested', False)]
523
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
524
        keys = [record.key for record in vf2.get_record_stream(
525
                    [('b',), ('a',), ('d',), ('c',)],
526
                    'as-requested', False)]
527
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
528
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
529
    def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
530
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
531
        def grouped_stream(revision_ids, first_parents=()):
532
            parents = first_parents
533
            for revision_id in revision_ids:
534
                key = (revision_id,)
535
                record = versionedfile.FulltextContentFactory(
536
                    key, parents, None,
537
                    'some content that is\n'
538
                    'identical except for\n'
539
                    'revision_id:%s\n' % (revision_id,))
540
                yield record
541
                parents = (key,)
542
        # One group, a-d
543
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
544
        # Second group, e-h
545
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
                                               first_parents=(('d',),)))
547
        block_bytes = {}
548
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
549
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
550
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
551
        for record in stream:
552
            if record.key in [('a',), ('e',)]:
553
                self.assertEqual('groupcompress-block', record.storage_kind)
554
            else:
555
                self.assertEqual('groupcompress-block-ref',
556
                                 record.storage_kind)
557
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
558
            num_records += 1
559
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
560
        for r in 'abcd':
561
            key = (r,)
562
            self.assertIs(block_bytes[key], block_bytes[('a',)])
563
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
564
        for r in 'efgh':
565
            key = (r,)
566
            self.assertIs(block_bytes[key], block_bytes[('e',)])
567
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
568
        # Now copy the blocks into another vf, and ensure that the blocks are
569
        # preserved without creating new entries
570
        vf2 = self.make_test_vf(True, dir='target')
571
        # ordering in 'groupcompress' order, should actually swap the groups in
572
        # the target vf, but the groups themselves should not be disturbed.
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
573
        def small_size_stream():
574
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
                                               'groupcompress', False):
576
                record._manager._full_enough_block_size = \
577
                    record._manager._block._content_length
578
                yield record
579
                        
580
        vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
581
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
                                       'groupcompress', False)
583
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
584
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
585
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
586
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
587
            self.assertEqual(block_bytes[record.key],
588
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
589
        self.assertEqual(8, num_records)
590
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
591
    def test_insert_record_stream_packs_on_the_fly(self):
592
        vf = self.make_test_vf(True, dir='source')
593
        def grouped_stream(revision_ids, first_parents=()):
594
            parents = first_parents
595
            for revision_id in revision_ids:
596
                key = (revision_id,)
597
                record = versionedfile.FulltextContentFactory(
598
                    key, parents, None,
599
                    'some content that is\n'
600
                    'identical except for\n'
601
                    'revision_id:%s\n' % (revision_id,))
602
                yield record
603
                parents = (key,)
604
        # One group, a-d
605
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
        # Second group, e-h
607
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
                                               first_parents=(('d',),)))
609
        # Now copy the blocks into another vf, and see that the
610
        # insert_record_stream rebuilt a new block on-the-fly because of
611
        # under-utilization
612
        vf2 = self.make_test_vf(True, dir='target')
613
        vf2.insert_record_stream(vf.get_record_stream(
614
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
                                       'groupcompress', False)
617
        vf2.writer.end()
618
        num_records = 0
619
        # All of the records should be recombined into a single block
620
        block = None
621
        for record in stream:
622
            num_records += 1
623
            if block is None:
624
                block = record._manager._block
625
            else:
626
                self.assertIs(block, record._manager._block)
627
        self.assertEqual(8, num_records)
628
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
629
    def test__insert_record_stream_no_reuse_block(self):
630
        vf = self.make_test_vf(True, dir='source')
631
        def grouped_stream(revision_ids, first_parents=()):
632
            parents = first_parents
633
            for revision_id in revision_ids:
634
                key = (revision_id,)
635
                record = versionedfile.FulltextContentFactory(
636
                    key, parents, None,
637
                    'some content that is\n'
638
                    'identical except for\n'
639
                    'revision_id:%s\n' % (revision_id,))
640
                yield record
641
                parents = (key,)
642
        # One group, a-d
643
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
644
        # Second group, e-h
645
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
                                               first_parents=(('d',),)))
647
        vf.writer.end()
648
        self.assertEqual(8, len(list(vf.get_record_stream(
649
                                        [(r,) for r in 'abcdefgh'],
650
                                        'unordered', False))))
651
        # Now copy the blocks into another vf, and ensure that the blocks are
652
        # preserved without creating new entries
653
        vf2 = self.make_test_vf(True, dir='target')
654
        # ordering in 'groupcompress' order, should actually swap the groups in
655
        # the target vf, but the groups themselves should not be disturbed.
656
        list(vf2._insert_record_stream(vf.get_record_stream(
657
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
658
            reuse_blocks=False))
659
        vf2.writer.end()
660
        # After inserting with reuse_blocks=False, we should have everything in
661
        # a single new block.
662
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
                                       'groupcompress', False)
664
        block = None
665
        for record in stream:
666
            if block is None:
667
                block = record._manager._block
668
            else:
669
                self.assertIs(block, record._manager._block)
670
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
671
    def test_add_missing_noncompression_parent_unvalidated_index(self):
672
        unvalidated = self.make_g_index_missing_parent()
673
        combined = _mod_index.CombinedGraphIndex([unvalidated])
674
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
675
            is_locked=lambda: True, parents=True,
676
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
677
        index.scan_unvalidated_index(unvalidated)
678
        self.assertEqual(
679
            frozenset([('missing-parent',)]), index.get_missing_parents())
680
681
    def test_track_external_parent_refs(self):
682
        g_index = self.make_g_index('empty', 1, [])
683
        mod_index = btree_index.BTreeBuilder(1, 1)
684
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
        index = groupcompress._GCGraphIndex(combined,
686
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
687
            add_callback=mod_index.add_nodes,
688
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
689
        index.add_records([
690
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
691
        self.assertEqual(
692
            frozenset([('parent-1',), ('parent-2',)]),
693
            index.get_missing_parents())
694
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
695
    def make_source_with_b(self, a_parent, path):
696
        source = self.make_test_vf(True, dir=path)
697
        source.add_lines(('a',), (), ['lines\n'])
698
        if a_parent:
699
            b_parents = (('a',),)
700
        else:
701
            b_parents = ()
702
        source.add_lines(('b',), b_parents, ['lines\n'])
703
        return source
704
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
705
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
        target = self.make_test_vf(True, dir='target',
707
                                   inconsistency_fatal=inconsistency_fatal)
708
        for x in range(2):
709
            source = self.make_source_with_b(x==1, 'source%s' % x)
710
            target.insert_record_stream(source.get_record_stream(
711
                [('b',)], 'unordered', False))
712
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
713
    def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley
Add test that duplicates are skipped.
714
        """Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
715
        warnings = []
716
        def warning(template, args):
717
            warnings.append(template % args)
718
        _trace_warning = trace.warning
719
        trace.warning = warning
720
        try:
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
721
            self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
722
        finally:
723
            trace.warning = _trace_warning
724
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
                         warnings)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
727
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
728
    def test_inconsistent_redundant_inserts_raises(self):
729
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
730
                              inconsistency_fatal=True)
731
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
                              " in add_records:"
733
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
                              " 0 8', \(\(\('a',\),\),\)\)")
735
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
736
    def test_clear_cache(self):
737
        vf = self.make_source_with_b(True, 'source')
738
        vf.writer.end()
739
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
740
                                           True):
741
            pass
742
        self.assertTrue(len(vf._group_cache) > 0)
743
        vf.clear_cache()
744
        self.assertEqual(0, len(vf._group_cache))
745
746
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
747
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
748
class StubGCVF(object):
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
749
    def __init__(self, canned_get_blocks=None):
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
750
        self._group_cache = {}
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
751
        self._canned_get_blocks = canned_get_blocks or []
752
    def _get_blocks(self, read_memos):
753
        return iter(self._canned_get_blocks)
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
754
    
755
756
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
    
759
    def test_add_key_new_read_memo(self):
760
        """Adding a key with an uncached read_memo new to this batch adds that
761
        read_memo to the list of memos to fetch.
762
        """
763
        # locations are: index_memo, ignored, parents, ignored
764
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
765
        # and (idx, offset, size) is known as the 'read_memo', identifying the
766
        # raw bytes needed.
767
        read_memo = ('fake index', 100, 50)
768
        locations = {
769
            ('key',): (read_memo + (None, None), None, None, None)}
770
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
        total_size = batcher.add_key(('key',))
772
        self.assertEqual(50, total_size)
773
        self.assertEqual([('key',)], batcher.keys)
774
        self.assertEqual([read_memo], batcher.memos_to_get)
775
776
    def test_add_key_duplicate_read_memo(self):
777
        """read_memos that occur multiple times in a batch will only be fetched
778
        once.
779
        """
780
        read_memo = ('fake index', 100, 50)
781
        # Two keys, both sharing the same read memo (but different overall
782
        # index_memos).
783
        locations = {
784
            ('key1',): (read_memo + (0, 1), None, None, None),
785
            ('key2',): (read_memo + (1, 2), None, None, None)}
786
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
        total_size = batcher.add_key(('key1',))
788
        total_size = batcher.add_key(('key2',))
789
        self.assertEqual(50, total_size)
790
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
        self.assertEqual([read_memo], batcher.memos_to_get)
792
793
    def test_add_key_cached_read_memo(self):
794
        """Adding a key with a cached read_memo will not cause that read_memo
795
        to be added to the list to fetch.
796
        """
797
        read_memo = ('fake index', 100, 50)
798
        gcvf = StubGCVF()
799
        gcvf._group_cache[read_memo] = 'fake block'
800
        locations = {
801
            ('key',): (read_memo + (None, None), None, None, None)}
802
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
        total_size = batcher.add_key(('key',))
804
        self.assertEqual(0, total_size)
805
        self.assertEqual([('key',)], batcher.keys)
806
        self.assertEqual([], batcher.memos_to_get)
807
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
808
    def test_yield_factories_empty(self):
809
        """An empty batch yields no factories."""
810
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
        self.assertEqual([], list(batcher.yield_factories()))
812
813
    def test_yield_factories_calls_get_blocks(self):
4634.3.22 by Andrew Bennetts
Fix docstring.
814
        """Uncached memos are retrieved via get_blocks."""
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
815
        read_memo1 = ('fake index', 100, 50)
816
        read_memo2 = ('fake index', 150, 40)
817
        gcvf = StubGCVF(
818
            canned_get_blocks=[
819
                (read_memo1, groupcompress.GroupCompressBlock()),
820
                (read_memo2, groupcompress.GroupCompressBlock())])
821
        locations = {
822
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
            ('key2',): (read_memo2 + (None, None), None, None, None)}
824
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
        batcher.add_key(('key1',))
826
        batcher.add_key(('key2',))
827
        factories = list(batcher.yield_factories(full_flush=True))
828
        self.assertLength(2, factories)
829
        keys = [f.key for f in factories]
830
        kinds = [f.storage_kind for f in factories]
831
        self.assertEqual([('key1',), ('key2',)], keys)
832
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
833
834
    def test_yield_factories_flushing(self):
835
        """yield_factories holds back on yielding results from the final block
836
        unless passed full_flush=True.
837
        """
838
        fake_block = groupcompress.GroupCompressBlock()
839
        read_memo = ('fake index', 100, 50)
840
        gcvf = StubGCVF()
841
        gcvf._group_cache[read_memo] = fake_block
842
        locations = {
843
            ('key',): (read_memo + (None, None), None, None, None)}
844
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
        batcher.add_key(('key',))
846
        self.assertEqual([], list(batcher.yield_factories()))
847
        factories = list(batcher.yield_factories(full_flush=True))
848
        self.assertLength(1, factories)
849
        self.assertEqual(('key',), factories[0].key)
850
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
851
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
852
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
853
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
854
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
855
    _texts = {
856
        ('key1',): "this is a text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
857
                   "with a reasonable amount of compressible bytes\n"
858
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
859
        ('key2',): "another text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
860
                   "with a reasonable amount of compressible bytes\n"
861
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
862
        ('key3',): "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
863
                   "with a reasonable amount of compressible bytes\n"
864
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
865
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
866
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
867
                   "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
868
                   "with a reasonable amount of compressible bytes\n"
869
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
870
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
871
    def make_block(self, key_to_text):
872
        """Create a GroupCompressBlock, filling it with the given texts."""
873
        compressor = groupcompress.GroupCompressor()
874
        start = 0
875
        for key in sorted(key_to_text):
876
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
877
        locs = dict((key, (start, end)) for key, (start, _, end, _)
878
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
879
        block = compressor.flush()
880
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
881
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
882
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
883
    def add_key_to_manager(self, key, locations, block, manager):
884
        start, end = locations[key]
885
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
886
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
887
    def make_block_and_full_manager(self, texts):
888
        locations, block = self.make_block(texts)
889
        manager = groupcompress._LazyGroupContentManager(block)
890
        for key in sorted(texts):
891
            self.add_key_to_manager(key, locations, block, manager)
892
        return block, manager
893
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
894
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
895
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
896
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
897
        self.add_key_to_manager(('key1',), locations, block, manager)
898
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
899
        result_order = []
900
        for record in manager.get_record_stream():
901
            result_order.append(record.key)
902
            text = self._texts[record.key]
903
            self.assertEqual(text, record.get_bytes_as('fulltext'))
904
        self.assertEqual([('key1',), ('key2',)], result_order)
905
906
        # If we build the manager in the opposite order, we should get them
907
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
908
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
909
        self.add_key_to_manager(('key2',), locations, block, manager)
910
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
911
        result_order = []
912
        for record in manager.get_record_stream():
913
            result_order.append(record.key)
914
            text = self._texts[record.key]
915
            self.assertEqual(text, record.get_bytes_as('fulltext'))
916
        self.assertEqual([('key2',), ('key1',)], result_order)
917
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
918
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
919
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
920
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
921
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
922
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
923
        # We should have triggered a strip, since we aren't using any content
924
        stripped_block = manager._block.to_bytes()
925
        self.assertTrue(block_length > len(stripped_block))
926
        empty_z_header = zlib.compress('')
927
        self.assertEqual('groupcompress-block\n'
928
                         '8\n' # len(compress(''))
929
                         '0\n' # len('')
930
                         '%d\n'# compressed block len
931
                         '%s'  # zheader
932
                         '%s'  # block
933
                         % (len(stripped_block), empty_z_header,
934
                            stripped_block),
935
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
936
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
937
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
938
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
939
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
940
        self.add_key_to_manager(('key1',), locations, block, manager)
941
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
942
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
943
        wire_bytes = manager._wire_bytes()
944
        (storage_kind, z_header_len, header_len,
945
         block_len, rest) = wire_bytes.split('\n', 4)
946
        z_header_len = int(z_header_len)
947
        header_len = int(header_len)
948
        block_len = int(block_len)
949
        self.assertEqual('groupcompress-block', storage_kind)
4665.3.8 by John Arbash Meinel
Of course, when you change the content, it can effect the stored wire bytes slightly.
950
        self.assertEqual(34, z_header_len)
951
        self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
952
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
953
        z_header = rest[:z_header_len]
954
        header = zlib.decompress(z_header)
955
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
956
        entry1 = locations[('key1',)]
957
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
958
        self.assertEqualDiff('key1\n'
959
                             '\n'  # no parents
960
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
961
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
962
                             'key4\n'
963
                             '\n'
964
                             '%d\n'
965
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
966
                             % (entry1[0], entry1[1],
967
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
968
                            header)
969
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
970
        self.assertEqual(block_bytes, z_block)
971
972
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
973
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
974
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
975
        self.add_key_to_manager(('key1',), locations, block, manager)
976
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
977
        wire_bytes = manager._wire_bytes()
978
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
979
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
980
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
981
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
982
        self.assertEqual(block._z_content, manager._block._z_content)
983
        result_order = []
984
        for record in manager.get_record_stream():
985
            result_order.append(record.key)
986
            text = self._texts[record.key]
987
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
988
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
989
990
    def test__check_rebuild_no_changes(self):
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
991
        block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
992
        manager._check_rebuild_block()
993
        self.assertIs(block, manager._block)
994
995
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
996
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
997
        manager = groupcompress._LazyGroupContentManager(block)
998
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
999
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1000
        manager._check_rebuild_block()
1001
        self.assertIsNot(block, manager._block)
1002
        self.assertTrue(block._content_length > manager._block._content_length)
1003
        # We should be able to still get the content out of this block, though
1004
        # it should only have 1 entry
1005
        for record in manager.get_record_stream():
1006
            self.assertEqual(('key1',), record.key)
1007
            self.assertEqual(self._texts[record.key],
1008
                             record.get_bytes_as('fulltext'))
1009
1010
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1011
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1012
        manager = groupcompress._LazyGroupContentManager(block)
1013
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1014
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1015
        manager._check_rebuild_block()
1016
        self.assertIsNot(block, manager._block)
1017
        self.assertTrue(block._content_length > manager._block._content_length)
1018
        for record in manager.get_record_stream():
1019
            self.assertEqual(('key4',), record.key)
1020
            self.assertEqual(self._texts[record.key],
1021
                             record.get_bytes_as('fulltext'))
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1022
1023
    def test_check_is_well_utilized_all_keys(self):
1024
        block, manager = self.make_block_and_full_manager(self._texts)
1025
        self.assertFalse(manager.check_is_well_utilized())
1026
        # Though we can fake it by changing the recommended minimum size
1027
        manager._full_enough_block_size = block._content_length
1028
        self.assertTrue(manager.check_is_well_utilized())
1029
        # Setting it just above causes it to fail
1030
        manager._full_enough_block_size = block._content_length + 1
1031
        self.assertFalse(manager.check_is_well_utilized())
1032
        # Setting the mixed-block size doesn't do anything, because the content
1033
        # is considered to not be 'mixed'
1034
        manager._full_enough_mixed_block_size = block._content_length
1035
        self.assertFalse(manager.check_is_well_utilized())
1036
1037
    def test_check_is_well_utilized_mixed_keys(self):
1038
        texts = {}
1039
        f1k1 = ('f1', 'k1')
1040
        f1k2 = ('f1', 'k2')
1041
        f2k1 = ('f2', 'k1')
1042
        f2k2 = ('f2', 'k2')
1043
        texts[f1k1] = self._texts[('key1',)]
1044
        texts[f1k2] = self._texts[('key2',)]
1045
        texts[f2k1] = self._texts[('key3',)]
1046
        texts[f2k2] = self._texts[('key4',)]
1047
        block, manager = self.make_block_and_full_manager(texts)
1048
        self.assertFalse(manager.check_is_well_utilized())
1049
        manager._full_enough_block_size = block._content_length
1050
        self.assertTrue(manager.check_is_well_utilized())
1051
        manager._full_enough_block_size = block._content_length + 1
1052
        self.assertFalse(manager.check_is_well_utilized())
1053
        manager._full_enough_mixed_block_size = block._content_length
1054
        self.assertTrue(manager.check_is_well_utilized())
1055
1056
    def test_check_is_well_utilized_partial_use(self):
1057
        locations, block = self.make_block(self._texts)
1058
        manager = groupcompress._LazyGroupContentManager(block)
1059
        manager._full_enough_block_size = block._content_length
1060
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
        self.add_key_to_manager(('key2',), locations, block, manager)
1062
        # Just using the content from key1 and 2 is not enough to be considered
1063
        # 'complete'
1064
        self.assertFalse(manager.check_is_well_utilized())
1065
        # However if we add key3, then we have enough, as we only require 75%
1066
        # consumption
1067
        self.add_key_to_manager(('key4',), locations, block, manager)
1068
        self.assertTrue(manager.check_is_well_utilized())