~bzr-pqm/bzr/bzr.dev

4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
22
    btree_index,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
23
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
24
    errors,
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
25
    index as _mod_index,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
26
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
27
    tests,
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
28
    trace,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
29
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
30
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
31
from bzrlib.osutils import sha_string
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
33
34
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
35
def load_tests(standard_tests, module, loader):
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
36
    """Parameterize tests for all versions of groupcompress."""
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
37
    to_adapt, result = tests.split_suite_by_condition(
38
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
    scenarios = [
40
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
        ]
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
42
    if compiled_groupcompress_feature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
43
        scenarios.append(('C',
44
            {'compressor': groupcompress.PyrexGroupCompressor}))
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
45
    return tests.multiply_tests(to_adapt, scenarios, result)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
46
47
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
48
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
49
50
    def _chunks_to_repr_lines(self, chunks):
51
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
53
    def assertEqualDiffEncoded(self, expected, actual):
54
        """Compare the actual content to the expected content.
55
56
        :param expected: A group of chunks that we expect to see
57
        :param actual: The measured 'chunks'
58
59
        We will transform the chunks back into lines, and then run 'repr()'
60
        over them to handle non-ascii characters.
61
        """
62
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
                             self._chunks_to_repr_lines(actual))
64
65
66
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
67
    """Tests for GroupCompressor"""
68
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
69
    compressor = None # Set by multiply_tests
70
0.17.2 by Robert Collins
Core proof of concept working.
71
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
72
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
73
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
74
75
    def test_one_nosha_delta(self):
76
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
77
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
78
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
79
            'strange\ncommon\n', None)
80
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
81
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
82
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
83
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
84
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
86
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
87
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
88
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
89
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
91
        self.assertEqual(0, start_point)
92
        self.assertEqual(0, end_point)
93
        self.assertEqual('fulltext', kind)
94
        self.assertEqual(groupcompress._null_sha1, sha1)
95
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
96
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
97
        # Even after adding some content
98
        compressor.compress(('content',), 'some\nbytes\n', None)
99
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
100
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
102
        self.assertEqual(0, start_point)
103
        self.assertEqual(0, end_point)
104
        self.assertEqual('fulltext', kind)
105
        self.assertEqual(groupcompress._null_sha1, sha1)
106
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
107
    def test_extract_from_compressor(self):
108
        # Knit fetching will try to reconstruct texts locally which results in
109
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
110
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
111
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
112
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
113
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
114
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
115
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
116
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
117
        self.assertEqual(('strange\ncommon long line\n'
118
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
119
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
120
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
121
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
                          'different\n', sha1_2),
123
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
124
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
125
    def test_pop_last(self):
126
        compressor = self.compressor()
127
        _, _, _, _ = compressor.compress(('key1',),
128
            'some text\nfor the first entry\n', None)
129
        expected_lines = list(compressor.chunks)
130
        _, _, _, _ = compressor.compress(('key2',),
131
            'some text\nfor the second entry\n', None)
132
        compressor.pop_last()
133
        self.assertEqual(expected_lines, compressor.chunks)
134
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
135
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
136
class TestPyrexGroupCompressor(TestGroupCompressor):
137
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
138
    _test_needs_features = [compiled_groupcompress_feature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
139
    compressor = groupcompress.PyrexGroupCompressor
140
141
    def test_stats(self):
142
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
143
        compressor.compress(('label',),
144
                            'strange\n'
145
                            'common very very long line\n'
146
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
147
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
148
                            'common very very long line\n'
149
                            'plus more text\n'
150
                            'different\n'
151
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
152
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
153
                            'new\n'
154
                            'common very very long line\n'
155
                            'plus more text\n'
156
                            'different\n'
157
                            'moredifferent\n', None)
158
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
159
160
    def test_two_nosha_delta(self):
161
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
162
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
164
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
165
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
        self.assertEqual(sha_string('common long line\n'
168
                                    'that needs a 16 byte match\n'
169
                                    'different\n'), sha1_2)
170
        expected_lines.extend([
171
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
172
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
173
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
174
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
175
            # copy the line common
176
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
            # add the line different, and the trailing newline
178
            '\x0adifferent\n', # insert 10 bytes
179
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
180
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
181
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
183
    def test_three_nosha_delta(self):
184
        # The first interesting test: make a change that should use lines from
185
        # both parents.
186
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
187
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
188
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
189
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
190
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
191
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
192
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
193
            'new\ncommon very very long line\nwith some extra text\n'
194
            'different\nmoredifferent\nand then some more\n',
195
            None)
196
        self.assertEqual(
197
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
                       'different\nmoredifferent\nand then some more\n'),
199
            sha1_3)
200
        expected_lines.extend([
201
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
202
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
203
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
204
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
205
            # insert new
206
            '\x03new',
207
            # Copy of first parent 'common' range
208
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
            # Copy of second parent 'different' range
210
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
212
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
213
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
215
216
class TestPythonGroupCompressor(TestGroupCompressor):
217
218
    compressor = groupcompress.PythonGroupCompressor
219
220
    def test_stats(self):
221
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
222
        compressor.compress(('label',),
223
                            'strange\n'
224
                            'common very very long line\n'
225
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
226
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
227
                            'common very very long line\n'
228
                            'plus more text\n'
229
                            'different\n'
230
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
231
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
232
                            'new\n'
233
                            'common very very long line\n'
234
                            'plus more text\n'
235
                            'different\n'
236
                            'moredifferent\n', None)
237
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
238
239
    def test_two_nosha_delta(self):
240
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
241
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
242
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
243
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
244
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
245
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
        self.assertEqual(sha_string('common long line\n'
247
                                    'that needs a 16 byte match\n'
248
                                    'different\n'), sha1_2)
249
        expected_lines.extend([
250
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
251
            'd\x0f',
252
            # target length
253
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
254
            # copy the line common
255
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
            # add the line different, and the trailing newline
257
            '\x0adifferent\n', # insert 10 bytes
258
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
259
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
260
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
262
    def test_three_nosha_delta(self):
263
        # The first interesting test: make a change that should use lines from
264
        # both parents.
265
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
266
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
267
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
268
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
269
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
270
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
271
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
272
            'new\ncommon very very long line\nwith some extra text\n'
273
            'different\nmoredifferent\nand then some more\n',
274
            None)
275
        self.assertEqual(
276
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
                       'different\nmoredifferent\nand then some more\n'),
278
            sha1_3)
279
        expected_lines.extend([
280
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
281
            'd\x0c',
282
            # target length
283
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
284
            # insert new
285
            '\x04new\n',
286
            # Copy of first parent 'common' range
287
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
            # Copy of second parent 'different' range
289
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
291
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
292
        self.assertEqual(sum(map(len, expected_lines)), end_point)
293
294
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
295
class TestGroupCompressBlock(tests.TestCase):
296
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
297
    def make_block(self, key_to_text):
298
        """Create a GroupCompressBlock, filling it with the given texts."""
299
        compressor = groupcompress.GroupCompressor()
300
        start = 0
301
        for key in sorted(key_to_text):
302
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
303
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
305
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
306
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
307
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
309
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
310
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
311
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
312
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
313
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
315
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
316
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
317
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
318
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
319
        self.assertIs(None, block._content)
320
        self.assertEqual('', block._z_content)
321
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
322
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
323
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
324
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
325
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
326
    def test_from_invalid(self):
327
        self.assertRaises(ValueError,
328
                          groupcompress.GroupCompressBlock.from_bytes,
329
                          'this is not a valid header')
330
3735.38.4 by John Arbash Meinel
Another disk format change.
331
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
332
        content = ('a tiny bit of content\n')
333
        z_content = zlib.compress(content)
334
        z_bytes = (
335
            'gcb1z\n' # group compress block v1 plain
336
            '%d\n' # Length of compressed content
337
            '%d\n' # Length of uncompressed content
338
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
339
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
340
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
341
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
342
        self.assertEqual(z_content, block._z_content)
343
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
344
        self.assertEqual(len(z_content), block._z_content_length)
345
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
346
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
347
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
348
        self.assertEqual(content, block._content)
349
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
350
    def test_to_chunks(self):
351
        content_chunks = ['this is some content\n',
352
                          'this content will be compressed\n']
353
        content_len = sum(map(len, content_chunks))
354
        content = ''.join(content_chunks)
355
        gcb = groupcompress.GroupCompressBlock()
356
        gcb.set_chunked_content(content_chunks, content_len)
357
        total_len, block_chunks = gcb.to_chunks()
358
        block_bytes = ''.join(block_chunks)
359
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
360
        self.assertEqual(total_len, len(block_bytes))
361
        self.assertEqual(gcb._content_length, content_len)
362
        expected_header =('gcb1z\n' # group compress block v1 zlib
363
                          '%d\n' # Length of compressed content
364
                          '%d\n' # Length of uncompressed content
365
                         ) % (gcb._z_content_length, gcb._content_length)
366
        # The first chunk should be the header chunk. It is small, fixed size,
367
        # and there is no compelling reason to split it up
368
        self.assertEqual(expected_header, block_chunks[0])
369
        self.assertStartsWith(block_bytes, expected_header)
370
        remaining_bytes = block_bytes[len(expected_header):]
371
        raw_bytes = zlib.decompress(remaining_bytes)
372
        self.assertEqual(content, raw_bytes)
373
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
374
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
375
        content = ('this is some content\n'
376
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
377
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
378
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
379
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
380
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
381
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
382
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
383
                          '%d\n' # Length of compressed content
384
                          '%d\n' # Length of uncompressed content
385
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
386
        self.assertStartsWith(bytes, expected_header)
387
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
388
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
389
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
390
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
391
        # we should get the same results if using the chunked version
392
        gcb = groupcompress.GroupCompressBlock()
393
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
394
                                 'this content will be compressed\n'],
395
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
396
        old_bytes = bytes
397
        bytes = gcb.to_bytes()
398
        self.assertEqual(old_bytes, bytes)
399
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
400
    def test_partial_decomp(self):
401
        content_chunks = []
402
        # We need a sufficient amount of data so that zlib.decompress has
403
        # partial decompression to work with. Most auto-generated data
404
        # compresses a bit too well, we want a combination, so we combine a sha
405
        # hash with compressible data.
406
        for i in xrange(2048):
407
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
408
            content_chunks.append(next_content)
409
            next_sha1 = osutils.sha_string(next_content)
410
            content_chunks.append(next_sha1 + '\n')
411
        content = ''.join(content_chunks)
412
        self.assertEqual(158634, len(content))
413
        z_content = zlib.compress(content)
414
        self.assertEqual(57182, len(z_content))
415
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
416
        block._z_content_chunks = (z_content,)
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
417
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
418
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
419
        block._content_length = 158634
420
        self.assertIs(None, block._content)
421
        block._ensure_content(100)
422
        self.assertIsNot(None, block._content)
423
        # We have decompressed at least 100 bytes
424
        self.assertTrue(len(block._content) >= 100)
425
        # We have not decompressed the whole content
426
        self.assertTrue(len(block._content) < 158634)
427
        self.assertEqualDiff(content[:len(block._content)], block._content)
428
        # ensuring content that we already have shouldn't cause any more data
429
        # to be extracted
430
        cur_len = len(block._content)
431
        block._ensure_content(cur_len - 10)
432
        self.assertEqual(cur_len, len(block._content))
433
        # Now we want a bit more content
434
        cur_len += 10
435
        block._ensure_content(cur_len)
436
        self.assertTrue(len(block._content) >= cur_len)
437
        self.assertTrue(len(block._content) < 158634)
438
        self.assertEqualDiff(content[:len(block._content)], block._content)
439
        # And now lets finish
440
        block._ensure_content(158634)
441
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
442
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
443
        self.assertIs(None, block._z_content_decompressor)
444
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
445
    def test__ensure_all_content(self):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
446
        content_chunks = []
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
447
        # We need a sufficient amount of data so that zlib.decompress has
448
        # partial decompression to work with. Most auto-generated data
449
        # compresses a bit too well, we want a combination, so we combine a sha
450
        # hash with compressible data.
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
451
        for i in xrange(2048):
452
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
453
            content_chunks.append(next_content)
454
            next_sha1 = osutils.sha_string(next_content)
455
            content_chunks.append(next_sha1 + '\n')
456
        content = ''.join(content_chunks)
457
        self.assertEqual(158634, len(content))
458
        z_content = zlib.compress(content)
459
        self.assertEqual(57182, len(z_content))
460
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
461
        block._z_content_chunks = (z_content,)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
462
        block._z_content_length = len(z_content)
463
        block._compressor_name = 'zlib'
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
464
        block._content_length = 158634
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
465
        self.assertIs(None, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
466
        # The first _ensure_content got all of the required data
467
        block._ensure_content(158634)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
468
        self.assertEqualDiff(content, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
469
        # And we should have released the _z_content_decompressor since it was
470
        # fully consumed
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
471
        self.assertIs(None, block._z_content_decompressor)
472
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
473
    def test__dump(self):
474
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
475
        key_to_text = {('1',): dup_content + '1 unique\n',
476
                       ('2',): dup_content + '2 extra special\n'}
477
        locs, block = self.make_block(key_to_text)
478
        self.assertEqual([('f', len(key_to_text[('1',)])),
479
                          ('d', 21, len(key_to_text[('2',)]),
480
                           [('c', 2, len(dup_content)),
481
                            ('i', len('2 extra special\n'), '')
482
                           ]),
483
                         ], block._dump())
484
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
485
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
486
class TestCaseWithGroupCompressVersionedFiles(
487
        tests.TestCaseWithMemoryTransport):
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
488
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
489
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
490
                     dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
491
        t = self.get_transport(dir)
492
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
493
        vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
494
            delta=False, keylength=keylength,
495
            inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
496
        if do_cleanup:
497
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
498
        return vf
499
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
500
501
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
502
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
503
    def make_g_index(self, name, ref_lists=0, nodes=[]):
504
        builder = btree_index.BTreeBuilder(ref_lists)
505
        for node, references, value in nodes:
506
            builder.add_node(node, references, value)
507
        stream = builder.finish()
508
        trans = self.get_transport()
509
        size = trans.put_file(name, stream)
510
        return btree_index.BTreeGraphIndex(trans, name, size)
511
512
    def make_g_index_missing_parent(self):
513
        graph_index = self.make_g_index('missing_parent', 1,
514
            [(('parent', ), '2 78 2 10', ([],)),
515
             (('tip', ), '2 78 2 10',
516
              ([('parent', ), ('missing-parent', )],)),
517
              ])
518
        return graph_index
519
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
520
    def test_get_record_stream_as_requested(self):
521
        # Consider promoting 'as-requested' to general availability, and
522
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
523
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
524
        vf.add_lines(('a',), (), ['lines\n'])
525
        vf.add_lines(('b',), (), ['lines\n'])
526
        vf.add_lines(('c',), (), ['lines\n'])
527
        vf.add_lines(('d',), (), ['lines\n'])
528
        vf.writer.end()
529
        keys = [record.key for record in vf.get_record_stream(
530
                    [('a',), ('b',), ('c',), ('d',)],
531
                    'as-requested', False)]
532
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
533
        keys = [record.key for record in vf.get_record_stream(
534
                    [('b',), ('a',), ('d',), ('c',)],
535
                    'as-requested', False)]
536
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
537
538
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
539
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
540
        vf2.insert_record_stream(vf.get_record_stream(
541
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
542
        vf2.writer.end()
543
544
        keys = [record.key for record in vf2.get_record_stream(
545
                    [('a',), ('b',), ('c',), ('d',)],
546
                    'as-requested', False)]
547
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
548
        keys = [record.key for record in vf2.get_record_stream(
549
                    [('b',), ('a',), ('d',), ('c',)],
550
                    'as-requested', False)]
551
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
552
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
553
    def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
554
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
555
        def grouped_stream(revision_ids, first_parents=()):
556
            parents = first_parents
557
            for revision_id in revision_ids:
558
                key = (revision_id,)
559
                record = versionedfile.FulltextContentFactory(
560
                    key, parents, None,
561
                    'some content that is\n'
562
                    'identical except for\n'
563
                    'revision_id:%s\n' % (revision_id,))
564
                yield record
565
                parents = (key,)
566
        # One group, a-d
567
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
568
        # Second group, e-h
569
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
570
                                               first_parents=(('d',),)))
571
        block_bytes = {}
572
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
573
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
574
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
575
        for record in stream:
576
            if record.key in [('a',), ('e',)]:
577
                self.assertEqual('groupcompress-block', record.storage_kind)
578
            else:
579
                self.assertEqual('groupcompress-block-ref',
580
                                 record.storage_kind)
581
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
582
            num_records += 1
583
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
584
        for r in 'abcd':
585
            key = (r,)
586
            self.assertIs(block_bytes[key], block_bytes[('a',)])
587
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
588
        for r in 'efgh':
589
            key = (r,)
590
            self.assertIs(block_bytes[key], block_bytes[('e',)])
591
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
592
        # Now copy the blocks into another vf, and ensure that the blocks are
593
        # preserved without creating new entries
594
        vf2 = self.make_test_vf(True, dir='target')
595
        # ordering in 'groupcompress' order, should actually swap the groups in
596
        # the target vf, but the groups themselves should not be disturbed.
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
597
        def small_size_stream():
598
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
599
                                               'groupcompress', False):
600
                record._manager._full_enough_block_size = \
601
                    record._manager._block._content_length
602
                yield record
603
                        
604
        vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
605
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
606
                                       'groupcompress', False)
607
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
608
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
609
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
610
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
611
            self.assertEqual(block_bytes[record.key],
612
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
613
        self.assertEqual(8, num_records)
614
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
615
    def test_insert_record_stream_packs_on_the_fly(self):
616
        vf = self.make_test_vf(True, dir='source')
617
        def grouped_stream(revision_ids, first_parents=()):
618
            parents = first_parents
619
            for revision_id in revision_ids:
620
                key = (revision_id,)
621
                record = versionedfile.FulltextContentFactory(
622
                    key, parents, None,
623
                    'some content that is\n'
624
                    'identical except for\n'
625
                    'revision_id:%s\n' % (revision_id,))
626
                yield record
627
                parents = (key,)
628
        # One group, a-d
629
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
630
        # Second group, e-h
631
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
632
                                               first_parents=(('d',),)))
633
        # Now copy the blocks into another vf, and see that the
634
        # insert_record_stream rebuilt a new block on-the-fly because of
635
        # under-utilization
636
        vf2 = self.make_test_vf(True, dir='target')
637
        vf2.insert_record_stream(vf.get_record_stream(
638
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
639
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
640
                                       'groupcompress', False)
641
        vf2.writer.end()
642
        num_records = 0
643
        # All of the records should be recombined into a single block
644
        block = None
645
        for record in stream:
646
            num_records += 1
647
            if block is None:
648
                block = record._manager._block
649
            else:
650
                self.assertIs(block, record._manager._block)
651
        self.assertEqual(8, num_records)
652
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
653
    def test__insert_record_stream_no_reuse_block(self):
654
        vf = self.make_test_vf(True, dir='source')
655
        def grouped_stream(revision_ids, first_parents=()):
656
            parents = first_parents
657
            for revision_id in revision_ids:
658
                key = (revision_id,)
659
                record = versionedfile.FulltextContentFactory(
660
                    key, parents, None,
661
                    'some content that is\n'
662
                    'identical except for\n'
663
                    'revision_id:%s\n' % (revision_id,))
664
                yield record
665
                parents = (key,)
666
        # One group, a-d
667
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
668
        # Second group, e-h
669
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
670
                                               first_parents=(('d',),)))
671
        vf.writer.end()
672
        self.assertEqual(8, len(list(vf.get_record_stream(
673
                                        [(r,) for r in 'abcdefgh'],
674
                                        'unordered', False))))
675
        # Now copy the blocks into another vf, and ensure that the blocks are
676
        # preserved without creating new entries
677
        vf2 = self.make_test_vf(True, dir='target')
678
        # ordering in 'groupcompress' order, should actually swap the groups in
679
        # the target vf, but the groups themselves should not be disturbed.
680
        list(vf2._insert_record_stream(vf.get_record_stream(
681
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
682
            reuse_blocks=False))
683
        vf2.writer.end()
684
        # After inserting with reuse_blocks=False, we should have everything in
685
        # a single new block.
686
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
687
                                       'groupcompress', False)
688
        block = None
689
        for record in stream:
690
            if block is None:
691
                block = record._manager._block
692
            else:
693
                self.assertIs(block, record._manager._block)
694
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
695
    def test_add_missing_noncompression_parent_unvalidated_index(self):
696
        unvalidated = self.make_g_index_missing_parent()
697
        combined = _mod_index.CombinedGraphIndex([unvalidated])
698
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
699
            is_locked=lambda: True, parents=True,
700
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
701
        index.scan_unvalidated_index(unvalidated)
702
        self.assertEqual(
703
            frozenset([('missing-parent',)]), index.get_missing_parents())
704
705
    def test_track_external_parent_refs(self):
706
        g_index = self.make_g_index('empty', 1, [])
707
        mod_index = btree_index.BTreeBuilder(1, 1)
708
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
709
        index = groupcompress._GCGraphIndex(combined,
710
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
711
            add_callback=mod_index.add_nodes,
712
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
713
        index.add_records([
714
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
715
        self.assertEqual(
716
            frozenset([('parent-1',), ('parent-2',)]),
717
            index.get_missing_parents())
718
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
719
    def make_source_with_b(self, a_parent, path):
720
        source = self.make_test_vf(True, dir=path)
721
        source.add_lines(('a',), (), ['lines\n'])
722
        if a_parent:
723
            b_parents = (('a',),)
724
        else:
725
            b_parents = ()
726
        source.add_lines(('b',), b_parents, ['lines\n'])
727
        return source
728
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
729
    def do_inconsistent_inserts(self, inconsistency_fatal):
730
        target = self.make_test_vf(True, dir='target',
731
                                   inconsistency_fatal=inconsistency_fatal)
732
        for x in range(2):
733
            source = self.make_source_with_b(x==1, 'source%s' % x)
734
            target.insert_record_stream(source.get_record_stream(
735
                [('b',)], 'unordered', False))
736
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
737
    def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley
Add test that duplicates are skipped.
738
        """Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
739
        warnings = []
740
        def warning(template, args):
741
            warnings.append(template % args)
742
        _trace_warning = trace.warning
743
        trace.warning = warning
744
        try:
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
745
            self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
746
        finally:
747
            trace.warning = _trace_warning
748
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
749
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
750
                         warnings)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
751
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
752
    def test_inconsistent_redundant_inserts_raises(self):
753
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
754
                              inconsistency_fatal=True)
755
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
756
                              " in add_records:"
757
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
758
                              " 0 8', \(\(\('a',\),\),\)\)")
759
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
760
    def test_clear_cache(self):
761
        vf = self.make_source_with_b(True, 'source')
762
        vf.writer.end()
763
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
764
                                           True):
765
            pass
766
        self.assertTrue(len(vf._group_cache) > 0)
767
        vf.clear_cache()
768
        self.assertEqual(0, len(vf._group_cache))
769
770
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
771
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
772
class StubGCVF(object):
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
773
    def __init__(self, canned_get_blocks=None):
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
774
        self._group_cache = {}
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
775
        self._canned_get_blocks = canned_get_blocks or []
776
    def _get_blocks(self, read_memos):
777
        return iter(self._canned_get_blocks)
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
778
    
779
780
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
781
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
782
    
783
    def test_add_key_new_read_memo(self):
784
        """Adding a key with an uncached read_memo new to this batch adds that
785
        read_memo to the list of memos to fetch.
786
        """
787
        # locations are: index_memo, ignored, parents, ignored
788
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
789
        # and (idx, offset, size) is known as the 'read_memo', identifying the
790
        # raw bytes needed.
791
        read_memo = ('fake index', 100, 50)
792
        locations = {
793
            ('key',): (read_memo + (None, None), None, None, None)}
794
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
795
        total_size = batcher.add_key(('key',))
796
        self.assertEqual(50, total_size)
797
        self.assertEqual([('key',)], batcher.keys)
798
        self.assertEqual([read_memo], batcher.memos_to_get)
799
800
    def test_add_key_duplicate_read_memo(self):
801
        """read_memos that occur multiple times in a batch will only be fetched
802
        once.
803
        """
804
        read_memo = ('fake index', 100, 50)
805
        # Two keys, both sharing the same read memo (but different overall
806
        # index_memos).
807
        locations = {
808
            ('key1',): (read_memo + (0, 1), None, None, None),
809
            ('key2',): (read_memo + (1, 2), None, None, None)}
810
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
811
        total_size = batcher.add_key(('key1',))
812
        total_size = batcher.add_key(('key2',))
813
        self.assertEqual(50, total_size)
814
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
815
        self.assertEqual([read_memo], batcher.memos_to_get)
816
817
    def test_add_key_cached_read_memo(self):
818
        """Adding a key with a cached read_memo will not cause that read_memo
819
        to be added to the list to fetch.
820
        """
821
        read_memo = ('fake index', 100, 50)
822
        gcvf = StubGCVF()
823
        gcvf._group_cache[read_memo] = 'fake block'
824
        locations = {
825
            ('key',): (read_memo + (None, None), None, None, None)}
826
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
827
        total_size = batcher.add_key(('key',))
828
        self.assertEqual(0, total_size)
829
        self.assertEqual([('key',)], batcher.keys)
830
        self.assertEqual([], batcher.memos_to_get)
831
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
832
    def test_yield_factories_empty(self):
833
        """An empty batch yields no factories."""
834
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
835
        self.assertEqual([], list(batcher.yield_factories()))
836
837
    def test_yield_factories_calls_get_blocks(self):
4634.3.22 by Andrew Bennetts
Fix docstring.
838
        """Uncached memos are retrieved via get_blocks."""
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
839
        read_memo1 = ('fake index', 100, 50)
840
        read_memo2 = ('fake index', 150, 40)
841
        gcvf = StubGCVF(
842
            canned_get_blocks=[
843
                (read_memo1, groupcompress.GroupCompressBlock()),
844
                (read_memo2, groupcompress.GroupCompressBlock())])
845
        locations = {
846
            ('key1',): (read_memo1 + (None, None), None, None, None),
847
            ('key2',): (read_memo2 + (None, None), None, None, None)}
848
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
849
        batcher.add_key(('key1',))
850
        batcher.add_key(('key2',))
851
        factories = list(batcher.yield_factories(full_flush=True))
852
        self.assertLength(2, factories)
853
        keys = [f.key for f in factories]
854
        kinds = [f.storage_kind for f in factories]
855
        self.assertEqual([('key1',), ('key2',)], keys)
856
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
857
858
    def test_yield_factories_flushing(self):
859
        """yield_factories holds back on yielding results from the final block
860
        unless passed full_flush=True.
861
        """
862
        fake_block = groupcompress.GroupCompressBlock()
863
        read_memo = ('fake index', 100, 50)
864
        gcvf = StubGCVF()
865
        gcvf._group_cache[read_memo] = fake_block
866
        locations = {
867
            ('key',): (read_memo + (None, None), None, None, None)}
868
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
869
        batcher.add_key(('key',))
870
        self.assertEqual([], list(batcher.yield_factories()))
871
        factories = list(batcher.yield_factories(full_flush=True))
872
        self.assertLength(1, factories)
873
        self.assertEqual(('key',), factories[0].key)
874
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
875
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
876
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
877
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
878
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
879
    _texts = {
880
        ('key1',): "this is a text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
881
                   "with a reasonable amount of compressible bytes\n"
882
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
883
        ('key2',): "another text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
884
                   "with a reasonable amount of compressible bytes\n"
885
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
886
        ('key3',): "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
887
                   "with a reasonable amount of compressible bytes\n"
888
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
889
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
890
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
891
                   "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
892
                   "with a reasonable amount of compressible bytes\n"
893
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
894
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
895
    def make_block(self, key_to_text):
896
        """Create a GroupCompressBlock, filling it with the given texts."""
897
        compressor = groupcompress.GroupCompressor()
898
        start = 0
899
        for key in sorted(key_to_text):
900
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
901
        locs = dict((key, (start, end)) for key, (start, _, end, _)
902
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
903
        block = compressor.flush()
904
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
905
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
906
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
907
    def add_key_to_manager(self, key, locations, block, manager):
908
        start, end = locations[key]
909
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
910
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
911
    def make_block_and_full_manager(self, texts):
912
        locations, block = self.make_block(texts)
913
        manager = groupcompress._LazyGroupContentManager(block)
914
        for key in sorted(texts):
915
            self.add_key_to_manager(key, locations, block, manager)
916
        return block, manager
917
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
918
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
919
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
920
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
921
        self.add_key_to_manager(('key1',), locations, block, manager)
922
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
923
        result_order = []
924
        for record in manager.get_record_stream():
925
            result_order.append(record.key)
926
            text = self._texts[record.key]
927
            self.assertEqual(text, record.get_bytes_as('fulltext'))
928
        self.assertEqual([('key1',), ('key2',)], result_order)
929
930
        # If we build the manager in the opposite order, we should get them
931
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
932
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
933
        self.add_key_to_manager(('key2',), locations, block, manager)
934
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
935
        result_order = []
936
        for record in manager.get_record_stream():
937
            result_order.append(record.key)
938
            text = self._texts[record.key]
939
            self.assertEqual(text, record.get_bytes_as('fulltext'))
940
        self.assertEqual([('key2',), ('key1',)], result_order)
941
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
942
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
943
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
944
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
945
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
946
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
947
        # We should have triggered a strip, since we aren't using any content
948
        stripped_block = manager._block.to_bytes()
949
        self.assertTrue(block_length > len(stripped_block))
950
        empty_z_header = zlib.compress('')
951
        self.assertEqual('groupcompress-block\n'
952
                         '8\n' # len(compress(''))
953
                         '0\n' # len('')
954
                         '%d\n'# compressed block len
955
                         '%s'  # zheader
956
                         '%s'  # block
957
                         % (len(stripped_block), empty_z_header,
958
                            stripped_block),
959
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
960
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
961
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
962
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
963
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
964
        self.add_key_to_manager(('key1',), locations, block, manager)
965
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
966
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
967
        wire_bytes = manager._wire_bytes()
968
        (storage_kind, z_header_len, header_len,
969
         block_len, rest) = wire_bytes.split('\n', 4)
970
        z_header_len = int(z_header_len)
971
        header_len = int(header_len)
972
        block_len = int(block_len)
973
        self.assertEqual('groupcompress-block', storage_kind)
4665.3.8 by John Arbash Meinel
Of course, when you change the content, it can effect the stored wire bytes slightly.
974
        self.assertEqual(34, z_header_len)
975
        self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
976
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
977
        z_header = rest[:z_header_len]
978
        header = zlib.decompress(z_header)
979
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
980
        entry1 = locations[('key1',)]
981
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
982
        self.assertEqualDiff('key1\n'
983
                             '\n'  # no parents
984
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
985
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
986
                             'key4\n'
987
                             '\n'
988
                             '%d\n'
989
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
990
                             % (entry1[0], entry1[1],
991
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
992
                            header)
993
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
994
        self.assertEqual(block_bytes, z_block)
995
996
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
997
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
998
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
999
        self.add_key_to_manager(('key1',), locations, block, manager)
1000
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1001
        wire_bytes = manager._wire_bytes()
1002
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
1003
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1004
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1005
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1006
        self.assertEqual(block._z_content, manager._block._z_content)
1007
        result_order = []
1008
        for record in manager.get_record_stream():
1009
            result_order.append(record.key)
1010
            text = self._texts[record.key]
1011
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1012
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1013
1014
    def test__check_rebuild_no_changes(self):
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1015
        block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1016
        manager._check_rebuild_block()
1017
        self.assertIs(block, manager._block)
1018
1019
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1020
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1021
        manager = groupcompress._LazyGroupContentManager(block)
1022
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1023
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1024
        manager._check_rebuild_block()
1025
        self.assertIsNot(block, manager._block)
1026
        self.assertTrue(block._content_length > manager._block._content_length)
1027
        # We should be able to still get the content out of this block, though
1028
        # it should only have 1 entry
1029
        for record in manager.get_record_stream():
1030
            self.assertEqual(('key1',), record.key)
1031
            self.assertEqual(self._texts[record.key],
1032
                             record.get_bytes_as('fulltext'))
1033
1034
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1035
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1036
        manager = groupcompress._LazyGroupContentManager(block)
1037
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1038
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1039
        manager._check_rebuild_block()
1040
        self.assertIsNot(block, manager._block)
1041
        self.assertTrue(block._content_length > manager._block._content_length)
1042
        for record in manager.get_record_stream():
1043
            self.assertEqual(('key4',), record.key)
1044
            self.assertEqual(self._texts[record.key],
1045
                             record.get_bytes_as('fulltext'))
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1046
1047
    def test_check_is_well_utilized_all_keys(self):
1048
        block, manager = self.make_block_and_full_manager(self._texts)
1049
        self.assertFalse(manager.check_is_well_utilized())
1050
        # Though we can fake it by changing the recommended minimum size
1051
        manager._full_enough_block_size = block._content_length
1052
        self.assertTrue(manager.check_is_well_utilized())
1053
        # Setting it just above causes it to fail
1054
        manager._full_enough_block_size = block._content_length + 1
1055
        self.assertFalse(manager.check_is_well_utilized())
1056
        # Setting the mixed-block size doesn't do anything, because the content
1057
        # is considered to not be 'mixed'
1058
        manager._full_enough_mixed_block_size = block._content_length
1059
        self.assertFalse(manager.check_is_well_utilized())
1060
1061
    def test_check_is_well_utilized_mixed_keys(self):
1062
        texts = {}
1063
        f1k1 = ('f1', 'k1')
1064
        f1k2 = ('f1', 'k2')
1065
        f2k1 = ('f2', 'k1')
1066
        f2k2 = ('f2', 'k2')
1067
        texts[f1k1] = self._texts[('key1',)]
1068
        texts[f1k2] = self._texts[('key2',)]
1069
        texts[f2k1] = self._texts[('key3',)]
1070
        texts[f2k2] = self._texts[('key4',)]
1071
        block, manager = self.make_block_and_full_manager(texts)
1072
        self.assertFalse(manager.check_is_well_utilized())
1073
        manager._full_enough_block_size = block._content_length
1074
        self.assertTrue(manager.check_is_well_utilized())
1075
        manager._full_enough_block_size = block._content_length + 1
1076
        self.assertFalse(manager.check_is_well_utilized())
1077
        manager._full_enough_mixed_block_size = block._content_length
1078
        self.assertTrue(manager.check_is_well_utilized())
1079
1080
    def test_check_is_well_utilized_partial_use(self):
1081
        locations, block = self.make_block(self._texts)
1082
        manager = groupcompress._LazyGroupContentManager(block)
1083
        manager._full_enough_block_size = block._content_length
1084
        self.add_key_to_manager(('key1',), locations, block, manager)
1085
        self.add_key_to_manager(('key2',), locations, block, manager)
1086
        # Just using the content from key1 and 2 is not enough to be considered
1087
        # 'complete'
1088
        self.assertFalse(manager.check_is_well_utilized())
1089
        # However if we add key3, then we have enough, as we only require 75%
1090
        # consumption
1091
        self.add_key_to_manager(('key4',), locations, block, manager)
1092
        self.assertTrue(manager.check_is_well_utilized())
5365.4.1 by John Arbash Meinel
Find a case where we are wasting a bit of memory.
1093
1094
1095
class Test_GCBuildDetails(tests.TestCase):
1096
1097
    def test_acts_like_tuple(self):
1098
        # _GCBuildDetails inlines some of the data that used to be spread out
1099
        # across a bunch of tuples
1100
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1101
            ('INDEX', 10, 20, 0, 5))
1102
        self.assertEqual(4, len(bd))
1103
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1104
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1105
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1106
        self.assertEqual(('group', None), bd[3]) # Record details
1107
1108
    def test__repr__(self):
1109
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1110
            ('INDEX', 10, 20, 0, 5))
1111
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1112
                         " (('parent1',), ('parent2',)))",
1113
                         repr(bd))
1114