~bzr-pqm/bzr/bzr.dev

5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
16
17
"""Tests for group compression."""
18
19
import zlib
20
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
21
from bzrlib import (
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
22
    btree_index,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
23
    groupcompress,
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
24
    errors,
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
25
    index as _mod_index,
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
26
    osutils,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
27
    tests,
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
28
    trace,
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
29
    versionedfile,
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
30
    )
0.23.58 by John Arbash Meinel
fix up the failing tests.
31
from bzrlib.osutils import sha_string
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
33
from bzrlib.tests.scenarios import load_tests_apply_scenarios
34
35
36
def group_compress_implementation_scenarios():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
37
    scenarios = [
38
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
39
        ]
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
40
    if compiled_groupcompress_feature.available():
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
41
        scenarios.append(('C',
42
            {'compressor': groupcompress.PyrexGroupCompressor}))
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
43
    return scenarios
44
45
46
load_tests = load_tests_apply_scenarios
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
47
48
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
49
class TestGroupCompressor(tests.TestCase):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
50
51
    def _chunks_to_repr_lines(self, chunks):
52
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
53
54
    def assertEqualDiffEncoded(self, expected, actual):
55
        """Compare the actual content to the expected content.
56
57
        :param expected: A group of chunks that we expect to see
58
        :param actual: The measured 'chunks'
59
60
        We will transform the chunks back into lines, and then run 'repr()'
61
        over them to handle non-ascii characters.
62
        """
63
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
64
                             self._chunks_to_repr_lines(actual))
65
66
67
class TestAllGroupCompressors(TestGroupCompressor):
0.17.2 by Robert Collins
Core proof of concept working.
68
    """Tests for GroupCompressor"""
69
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
70
    scenarios = group_compress_implementation_scenarios()
71
    compressor = None # Set by scenario
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
72
0.17.2 by Robert Collins
Core proof of concept working.
73
    def test_empty_delta(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
74
        compressor = self.compressor()
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
75
        self.assertEqual([], compressor.chunks)
0.17.2 by Robert Collins
Core proof of concept working.
76
77
    def test_one_nosha_delta(self):
78
        # diff against NUKK
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
79
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
80
        sha1, start_point, end_point, _ = compressor.compress(('label',),
0.23.58 by John Arbash Meinel
fix up the failing tests.
81
            'strange\ncommon\n', None)
82
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
83
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
84
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
85
        self.assertEqual(0, start_point)
0.17.2 by Robert Collins
Core proof of concept working.
86
        self.assertEqual(sum(map(len, expected_lines)), end_point)
87
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
88
    def test_empty_content(self):
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
89
        compressor = self.compressor()
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
90
        # Adding empty bytes should return the 'null' record
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
91
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
92
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
93
        self.assertEqual(0, start_point)
94
        self.assertEqual(0, end_point)
95
        self.assertEqual('fulltext', kind)
96
        self.assertEqual(groupcompress._null_sha1, sha1)
97
        self.assertEqual(0, compressor.endpoint)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
98
        self.assertEqual([], compressor.chunks)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
99
        # Even after adding some content
100
        compressor.compress(('content',), 'some\nbytes\n', None)
101
        self.assertTrue(compressor.endpoint > 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
102
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
103
                                                                 '', None)
3735.2.162 by John Arbash Meinel
Change GroupCompressor.compress() to return the start_point.
104
        self.assertEqual(0, start_point)
105
        self.assertEqual(0, end_point)
106
        self.assertEqual('fulltext', kind)
107
        self.assertEqual(groupcompress._null_sha1, sha1)
108
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
109
    def test_extract_from_compressor(self):
110
        # Knit fetching will try to reconstruct texts locally which results in
111
        # reading something that is in the compressor stream already.
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
112
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
113
        sha1_1, _, _, _ = compressor.compress(('label',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
114
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
115
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
116
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
117
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
118
        # get the first out
0.25.8 by John Arbash Meinel
Fix up the tests. Mostly it was just changing things to
119
        self.assertEqual(('strange\ncommon long line\n'
120
                          'that needs a 16 byte match\n', sha1_1),
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
121
                         compressor.extract(('label',)))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
122
        # and the second
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
123
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
124
                          'different\n', sha1_2),
125
                         compressor.extract(('newlabel',)))
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
126
4241.17.2 by John Arbash Meinel
PythonGroupCompressor needs to support pop_last() properly.
127
    def test_pop_last(self):
128
        compressor = self.compressor()
129
        _, _, _, _ = compressor.compress(('key1',),
130
            'some text\nfor the first entry\n', None)
131
        expected_lines = list(compressor.chunks)
132
        _, _, _, _ = compressor.compress(('key2',),
133
            'some text\nfor the second entry\n', None)
134
        compressor.pop_last()
135
        self.assertEqual(expected_lines, compressor.chunks)
136
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
137
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
138
class TestPyrexGroupCompressor(TestGroupCompressor):
139
4913.2.24 by John Arbash Meinel
Track down a few more import typos.
140
    _test_needs_features = [compiled_groupcompress_feature]
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
141
    compressor = groupcompress.PyrexGroupCompressor
142
143
    def test_stats(self):
144
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
145
        compressor.compress(('label',),
146
                            'strange\n'
147
                            'common very very long line\n'
148
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
149
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
150
                            'common very very long line\n'
151
                            'plus more text\n'
152
                            'different\n'
153
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
154
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
155
                            'new\n'
156
                            'common very very long line\n'
157
                            'plus more text\n'
158
                            'different\n'
159
                            'moredifferent\n', None)
160
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
161
162
    def test_two_nosha_delta(self):
163
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
164
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
165
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
166
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
167
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
168
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
169
        self.assertEqual(sha_string('common long line\n'
170
                                    'that needs a 16 byte match\n'
171
                                    'different\n'), sha1_2)
172
        expected_lines.extend([
173
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
174
            'd\x0f',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
175
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
176
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
177
            # copy the line common
178
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
179
            # add the line different, and the trailing newline
180
            '\x0adifferent\n', # insert 10 bytes
181
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
182
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
183
        self.assertEqual(sum(map(len, expected_lines)), end_point)
184
185
    def test_three_nosha_delta(self):
186
        # The first interesting test: make a change that should use lines from
187
        # both parents.
188
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
189
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
190
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
191
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
192
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
193
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
194
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
195
            'new\ncommon very very long line\nwith some extra text\n'
196
            'different\nmoredifferent\nand then some more\n',
197
            None)
198
        self.assertEqual(
199
            sha_string('new\ncommon very very long line\nwith some extra text\n'
200
                       'different\nmoredifferent\nand then some more\n'),
201
            sha1_3)
202
        expected_lines.extend([
203
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
204
            'd\x0b',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
205
            # source and target length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
206
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
207
            # insert new
208
            '\x03new',
209
            # Copy of first parent 'common' range
210
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
211
            # Copy of second parent 'different' range
212
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
213
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
214
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
215
        self.assertEqual(sum(map(len, expected_lines)), end_point)
216
217
218
class TestPythonGroupCompressor(TestGroupCompressor):
219
220
    compressor = groupcompress.PythonGroupCompressor
221
222
    def test_stats(self):
223
        compressor = self.compressor()
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
224
        compressor.compress(('label',),
225
                            'strange\n'
226
                            'common very very long line\n'
227
                            'plus more text\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
228
        compressor.compress(('newlabel',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
229
                            'common very very long line\n'
230
                            'plus more text\n'
231
                            'different\n'
232
                            'moredifferent\n', None)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
233
        compressor.compress(('label3',),
3735.40.7 by John Arbash Meinel
Move even more functionality into EquivalenceTable.
234
                            'new\n'
235
                            'common very very long line\n'
236
                            'plus more text\n'
237
                            'different\n'
238
                            'moredifferent\n', None)
239
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
240
241
    def test_two_nosha_delta(self):
242
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
243
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
244
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
245
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
246
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
247
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
248
        self.assertEqual(sha_string('common long line\n'
249
                                    'that needs a 16 byte match\n'
250
                                    'different\n'), sha1_2)
251
        expected_lines.extend([
252
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
253
            'd\x0f',
254
            # target length
255
            '\x36',
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
256
            # copy the line common
257
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
258
            # add the line different, and the trailing newline
259
            '\x0adifferent\n', # insert 10 bytes
260
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
261
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
262
        self.assertEqual(sum(map(len, expected_lines)), end_point)
263
264
    def test_three_nosha_delta(self):
265
        # The first interesting test: make a change that should use lines from
266
        # both parents.
267
        compressor = self.compressor()
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
268
        sha1_1, _, _, _ = compressor.compress(('label',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
269
            'strange\ncommon very very long line\nwith some extra text\n', None)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
270
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
271
            'different\nmoredifferent\nand then some more\n', None)
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
272
        expected_lines = list(compressor.chunks)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
273
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
274
            'new\ncommon very very long line\nwith some extra text\n'
275
            'different\nmoredifferent\nand then some more\n',
276
            None)
277
        self.assertEqual(
278
            sha_string('new\ncommon very very long line\nwith some extra text\n'
279
                       'different\nmoredifferent\nand then some more\n'),
280
            sha1_3)
281
        expected_lines.extend([
282
            # 'delta', delta length
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
283
            'd\x0c',
284
            # target length
285
            '\x5f'
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
286
            # insert new
287
            '\x04new\n',
288
            # Copy of first parent 'common' range
289
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
290
            # Copy of second parent 'different' range
291
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
292
            ])
3735.40.17 by John Arbash Meinel
Change the attribute from 'lines' to 'chunks' to make it more
293
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
3735.40.4 by John Arbash Meinel
Factor out tests that rely on the exact bytecode.
294
        self.assertEqual(sum(map(len, expected_lines)), end_point)
295
296
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
297
class TestGroupCompressBlock(tests.TestCase):
298
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
299
    def make_block(self, key_to_text):
300
        """Create a GroupCompressBlock, filling it with the given texts."""
301
        compressor = groupcompress.GroupCompressor()
302
        start = 0
303
        for key in sorted(key_to_text):
304
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
305
        locs = dict((key, (start, end)) for key, (start, _, end, _)
306
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
307
        block = compressor.flush()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
308
        raw_bytes = block.to_bytes()
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
309
        # Go through from_bytes(to_bytes()) so that we start with a compressed
310
        # content object
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
311
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
312
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
313
    def test_from_empty_bytes(self):
3735.31.1 by John Arbash Meinel
Bring the groupcompress plugin into the brisbane-core branch.
314
        self.assertRaises(ValueError,
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
315
                          groupcompress.GroupCompressBlock.from_bytes, '')
316
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
317
    def test_from_minimal_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
318
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.38.4 by John Arbash Meinel
Another disk format change.
319
            'gcb1z\n0\n0\n')
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
320
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
321
        self.assertIs(None, block._content)
322
        self.assertEqual('', block._z_content)
323
        block._ensure_content()
3735.32.5 by John Arbash Meinel
Change the parsing code to start out just holding the compressed bytes.
324
        self.assertEqual('', block._content)
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
325
        self.assertEqual('', block._z_content)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
326
        block._ensure_content() # Ensure content is safe to call 2x
0.25.4 by John Arbash Meinel
We at least have the rudimentary ability to encode and decode values.
327
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
328
    def test_from_invalid(self):
329
        self.assertRaises(ValueError,
330
                          groupcompress.GroupCompressBlock.from_bytes,
331
                          'this is not a valid header')
332
3735.38.4 by John Arbash Meinel
Another disk format change.
333
    def test_from_bytes(self):
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
334
        content = ('a tiny bit of content\n')
335
        z_content = zlib.compress(content)
336
        z_bytes = (
337
            'gcb1z\n' # group compress block v1 plain
338
            '%d\n' # Length of compressed content
339
            '%d\n' # Length of uncompressed content
340
            '%s'   # Compressed content
3735.38.4 by John Arbash Meinel
Another disk format change.
341
            ) % (len(z_content), len(content), z_content)
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
342
        block = groupcompress.GroupCompressBlock.from_bytes(
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
343
            z_bytes)
3735.32.6 by John Arbash Meinel
A bit of reworking changes things so content is expanded at extract() time.
344
        self.assertEqual(z_content, block._z_content)
345
        self.assertIs(None, block._content)
3735.38.4 by John Arbash Meinel
Another disk format change.
346
        self.assertEqual(len(z_content), block._z_content_length)
347
        self.assertEqual(len(content), block._content_length)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
348
        block._ensure_content()
3735.32.27 by John Arbash Meinel
Have _LazyGroupContentManager pre-extract everything it holds.
349
        self.assertEqual(z_content, block._z_content)
3735.32.10 by John Arbash Meinel
test that we support reading from the gc blocks that didn't have their lengths.
350
        self.assertEqual(content, block._content)
351
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
352
    def test_to_chunks(self):
353
        content_chunks = ['this is some content\n',
354
                          'this content will be compressed\n']
355
        content_len = sum(map(len, content_chunks))
356
        content = ''.join(content_chunks)
357
        gcb = groupcompress.GroupCompressBlock()
358
        gcb.set_chunked_content(content_chunks, content_len)
359
        total_len, block_chunks = gcb.to_chunks()
360
        block_bytes = ''.join(block_chunks)
361
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
362
        self.assertEqual(total_len, len(block_bytes))
363
        self.assertEqual(gcb._content_length, content_len)
364
        expected_header =('gcb1z\n' # group compress block v1 zlib
365
                          '%d\n' # Length of compressed content
366
                          '%d\n' # Length of uncompressed content
367
                         ) % (gcb._z_content_length, gcb._content_length)
368
        # The first chunk should be the header chunk. It is small, fixed size,
369
        # and there is no compelling reason to split it up
370
        self.assertEqual(expected_header, block_chunks[0])
371
        self.assertStartsWith(block_bytes, expected_header)
372
        remaining_bytes = block_bytes[len(expected_header):]
373
        raw_bytes = zlib.decompress(remaining_bytes)
374
        self.assertEqual(content, raw_bytes)
375
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
376
    def test_to_bytes(self):
3735.38.4 by John Arbash Meinel
Another disk format change.
377
        content = ('this is some content\n'
378
                   'this content will be compressed\n')
0.25.2 by John Arbash Meinel
First cut at meta-info as text form.
379
        gcb = groupcompress.GroupCompressBlock()
3735.38.4 by John Arbash Meinel
Another disk format change.
380
        gcb.set_content(content)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
381
        bytes = gcb.to_bytes()
3735.38.4 by John Arbash Meinel
Another disk format change.
382
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
383
        self.assertEqual(gcb._content_length, len(content))
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
384
        expected_header =('gcb1z\n' # group compress block v1 zlib
3735.38.4 by John Arbash Meinel
Another disk format change.
385
                          '%d\n' # Length of compressed content
386
                          '%d\n' # Length of uncompressed content
387
                         ) % (gcb._z_content_length, gcb._content_length)
3735.32.4 by John Arbash Meinel
Change the byte representation of a groupcompress block.
388
        self.assertStartsWith(bytes, expected_header)
389
        remaining_bytes = bytes[len(expected_header):]
0.25.5 by John Arbash Meinel
Now using a zlib compressed format.
390
        raw_bytes = zlib.decompress(remaining_bytes)
3735.38.4 by John Arbash Meinel
Another disk format change.
391
        self.assertEqual(content, raw_bytes)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
392
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
393
        # we should get the same results if using the chunked version
394
        gcb = groupcompress.GroupCompressBlock()
395
        gcb.set_chunked_content(['this is some content\n'
4469.1.2 by John Arbash Meinel
The only caller already knows the content length, so make the api such that
396
                                 'this content will be compressed\n'],
397
                                 len(content))
4469.1.1 by John Arbash Meinel
Add a set_content_chunked member to GroupCompressBlock.
398
        old_bytes = bytes
399
        bytes = gcb.to_bytes()
400
        self.assertEqual(old_bytes, bytes)
401
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
402
    def test_partial_decomp(self):
403
        content_chunks = []
404
        # We need a sufficient amount of data so that zlib.decompress has
405
        # partial decompression to work with. Most auto-generated data
406
        # compresses a bit too well, we want a combination, so we combine a sha
407
        # hash with compressible data.
408
        for i in xrange(2048):
409
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
410
            content_chunks.append(next_content)
411
            next_sha1 = osutils.sha_string(next_content)
412
            content_chunks.append(next_sha1 + '\n')
413
        content = ''.join(content_chunks)
414
        self.assertEqual(158634, len(content))
415
        z_content = zlib.compress(content)
416
        self.assertEqual(57182, len(z_content))
417
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
418
        block._z_content_chunks = (z_content,)
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
419
        block._z_content_length = len(z_content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
420
        block._compressor_name = 'zlib'
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
421
        block._content_length = 158634
422
        self.assertIs(None, block._content)
423
        block._ensure_content(100)
424
        self.assertIsNot(None, block._content)
425
        # We have decompressed at least 100 bytes
426
        self.assertTrue(len(block._content) >= 100)
427
        # We have not decompressed the whole content
428
        self.assertTrue(len(block._content) < 158634)
429
        self.assertEqualDiff(content[:len(block._content)], block._content)
430
        # ensuring content that we already have shouldn't cause any more data
431
        # to be extracted
432
        cur_len = len(block._content)
433
        block._ensure_content(cur_len - 10)
434
        self.assertEqual(cur_len, len(block._content))
435
        # Now we want a bit more content
436
        cur_len += 10
437
        block._ensure_content(cur_len)
438
        self.assertTrue(len(block._content) >= cur_len)
439
        self.assertTrue(len(block._content) < 158634)
440
        self.assertEqualDiff(content[:len(block._content)], block._content)
441
        # And now lets finish
442
        block._ensure_content(158634)
443
        self.assertEqualDiff(content, block._content)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
444
        # And the decompressor is finalized
3735.32.7 by John Arbash Meinel
Implement partial decompression support.
445
        self.assertIs(None, block._z_content_decompressor)
446
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
447
    def test__ensure_all_content(self):
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
448
        content_chunks = []
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
449
        # We need a sufficient amount of data so that zlib.decompress has
450
        # partial decompression to work with. Most auto-generated data
451
        # compresses a bit too well, we want a combination, so we combine a sha
452
        # hash with compressible data.
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
453
        for i in xrange(2048):
454
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
455
            content_chunks.append(next_content)
456
            next_sha1 = osutils.sha_string(next_content)
457
            content_chunks.append(next_sha1 + '\n')
458
        content = ''.join(content_chunks)
459
        self.assertEqual(158634, len(content))
460
        z_content = zlib.compress(content)
461
        self.assertEqual(57182, len(z_content))
462
        block = groupcompress.GroupCompressBlock()
5439.2.1 by John Arbash Meinel
Change GroupCompressBlock to work in self._z_compress_chunks
463
        block._z_content_chunks = (z_content,)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
464
        block._z_content_length = len(z_content)
465
        block._compressor_name = 'zlib'
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
466
        block._content_length = 158634
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
467
        self.assertIs(None, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
468
        # The first _ensure_content got all of the required data
469
        block._ensure_content(158634)
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
470
        self.assertEqualDiff(content, block._content)
4744.2.3 by John Arbash Meinel
change the GroupcompressBlock code a bit.
471
        # And we should have released the _z_content_decompressor since it was
472
        # fully consumed
3735.32.11 by John Arbash Meinel
Add tests for the ability to do partial decompression without knowing the final length.
473
        self.assertIs(None, block._z_content_decompressor)
474
4300.1.1 by John Arbash Meinel
Add the ability to convert a gc block into 'human readable' form.
475
    def test__dump(self):
476
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
477
        key_to_text = {('1',): dup_content + '1 unique\n',
478
                       ('2',): dup_content + '2 extra special\n'}
479
        locs, block = self.make_block(key_to_text)
480
        self.assertEqual([('f', len(key_to_text[('1',)])),
481
                          ('d', 21, len(key_to_text[('2',)]),
482
                           [('c', 2, len(dup_content)),
483
                            ('i', len('2 extra special\n'), '')
484
                           ]),
485
                         ], block._dump())
486
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
487
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
488
class TestCaseWithGroupCompressVersionedFiles(
489
        tests.TestCaseWithMemoryTransport):
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
490
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
491
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
492
                     dir='.', inconsistency_fatal=True):
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
493
        t = self.get_transport(dir)
494
        t.ensure_base()
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
495
        vf = groupcompress.make_pack_factory(graph=create_graph,
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
496
            delta=False, keylength=keylength,
497
            inconsistency_fatal=inconsistency_fatal)(t)
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
498
        if do_cleanup:
499
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
500
        return vf
501
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
502
503
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
504
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
505
    def make_g_index(self, name, ref_lists=0, nodes=[]):
506
        builder = btree_index.BTreeBuilder(ref_lists)
507
        for node, references, value in nodes:
508
            builder.add_node(node, references, value)
509
        stream = builder.finish()
510
        trans = self.get_transport()
511
        size = trans.put_file(name, stream)
512
        return btree_index.BTreeGraphIndex(trans, name, size)
513
514
    def make_g_index_missing_parent(self):
515
        graph_index = self.make_g_index('missing_parent', 1,
516
            [(('parent', ), '2 78 2 10', ([],)),
517
             (('tip', ), '2 78 2 10',
518
              ([('parent', ), ('missing-parent', )],)),
519
              ])
520
        return graph_index
521
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
522
    def test_get_record_stream_as_requested(self):
523
        # Consider promoting 'as-requested' to general availability, and
524
        # make this a VF interface test
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
525
        vf = self.make_test_vf(False, dir='source')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
526
        vf.add_lines(('a',), (), ['lines\n'])
527
        vf.add_lines(('b',), (), ['lines\n'])
528
        vf.add_lines(('c',), (), ['lines\n'])
529
        vf.add_lines(('d',), (), ['lines\n'])
530
        vf.writer.end()
531
        keys = [record.key for record in vf.get_record_stream(
532
                    [('a',), ('b',), ('c',), ('d',)],
533
                    'as-requested', False)]
534
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
535
        keys = [record.key for record in vf.get_record_stream(
536
                    [('b',), ('a',), ('d',), ('c',)],
537
                    'as-requested', False)]
538
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
539
540
        # It should work even after being repacked into another VF
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
541
        vf2 = self.make_test_vf(False, dir='target')
3735.32.3 by John Arbash Meinel
Start doing some direct GCVF tests.
542
        vf2.insert_record_stream(vf.get_record_stream(
543
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
544
        vf2.writer.end()
545
546
        keys = [record.key for record in vf2.get_record_stream(
547
                    [('a',), ('b',), ('c',), ('d',)],
548
                    'as-requested', False)]
549
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
550
        keys = [record.key for record in vf2.get_record_stream(
551
                    [('b',), ('a',), ('d',), ('c',)],
552
                    'as-requested', False)]
553
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
554
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
555
    def test_insert_record_stream_reuses_blocks(self):
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
556
        vf = self.make_test_vf(True, dir='source')
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
557
        def grouped_stream(revision_ids, first_parents=()):
558
            parents = first_parents
559
            for revision_id in revision_ids:
560
                key = (revision_id,)
561
                record = versionedfile.FulltextContentFactory(
562
                    key, parents, None,
563
                    'some content that is\n'
564
                    'identical except for\n'
565
                    'revision_id:%s\n' % (revision_id,))
566
                yield record
567
                parents = (key,)
568
        # One group, a-d
569
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
570
        # Second group, e-h
571
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
572
                                               first_parents=(('d',),)))
573
        block_bytes = {}
574
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
                                      'unordered', False)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
576
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
577
        for record in stream:
578
            if record.key in [('a',), ('e',)]:
579
                self.assertEqual('groupcompress-block', record.storage_kind)
580
            else:
581
                self.assertEqual('groupcompress-block-ref',
582
                                 record.storage_kind)
583
            block_bytes[record.key] = record._manager._block._z_content
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
584
            num_records += 1
585
        self.assertEqual(8, num_records)
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
586
        for r in 'abcd':
587
            key = (r,)
588
            self.assertIs(block_bytes[key], block_bytes[('a',)])
589
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
590
        for r in 'efgh':
591
            key = (r,)
592
            self.assertIs(block_bytes[key], block_bytes[('e',)])
593
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
594
        # Now copy the blocks into another vf, and ensure that the blocks are
595
        # preserved without creating new entries
596
        vf2 = self.make_test_vf(True, dir='target')
597
        # ordering in 'groupcompress' order, should actually swap the groups in
598
        # the target vf, but the groups themselves should not be disturbed.
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
599
        def small_size_stream():
600
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
601
                                               'groupcompress', False):
602
                record._manager._full_enough_block_size = \
603
                    record._manager._block._content_length
604
                yield record
605
                        
606
        vf2.insert_record_stream(small_size_stream())
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
607
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
608
                                       'groupcompress', False)
609
        vf2.writer.end()
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
610
        num_records = 0
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
611
        for record in stream:
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
612
            num_records += 1
3735.32.20 by John Arbash Meinel
groupcompress now copies the blocks exactly as they were given.
613
            self.assertEqual(block_bytes[record.key],
614
                             record._manager._block._z_content)
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
615
        self.assertEqual(8, num_records)
616
4665.3.9 by John Arbash Meinel
Start doing some work to make sure that we call _check_rebuild_block
617
    def test_insert_record_stream_packs_on_the_fly(self):
618
        vf = self.make_test_vf(True, dir='source')
619
        def grouped_stream(revision_ids, first_parents=()):
620
            parents = first_parents
621
            for revision_id in revision_ids:
622
                key = (revision_id,)
623
                record = versionedfile.FulltextContentFactory(
624
                    key, parents, None,
625
                    'some content that is\n'
626
                    'identical except for\n'
627
                    'revision_id:%s\n' % (revision_id,))
628
                yield record
629
                parents = (key,)
630
        # One group, a-d
631
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
632
        # Second group, e-h
633
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
634
                                               first_parents=(('d',),)))
635
        # Now copy the blocks into another vf, and see that the
636
        # insert_record_stream rebuilt a new block on-the-fly because of
637
        # under-utilization
638
        vf2 = self.make_test_vf(True, dir='target')
639
        vf2.insert_record_stream(vf.get_record_stream(
640
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
641
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
642
                                       'groupcompress', False)
643
        vf2.writer.end()
644
        num_records = 0
645
        # All of the records should be recombined into a single block
646
        block = None
647
        for record in stream:
648
            num_records += 1
649
            if block is None:
650
                block = record._manager._block
651
            else:
652
                self.assertIs(block, record._manager._block)
653
        self.assertEqual(8, num_records)
654
3735.32.21 by John Arbash Meinel
We now have a 'reuse_blocks=False' flag for autopack et al.
655
    def test__insert_record_stream_no_reuse_block(self):
656
        vf = self.make_test_vf(True, dir='source')
657
        def grouped_stream(revision_ids, first_parents=()):
658
            parents = first_parents
659
            for revision_id in revision_ids:
660
                key = (revision_id,)
661
                record = versionedfile.FulltextContentFactory(
662
                    key, parents, None,
663
                    'some content that is\n'
664
                    'identical except for\n'
665
                    'revision_id:%s\n' % (revision_id,))
666
                yield record
667
                parents = (key,)
668
        # One group, a-d
669
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
670
        # Second group, e-h
671
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
672
                                               first_parents=(('d',),)))
673
        vf.writer.end()
674
        self.assertEqual(8, len(list(vf.get_record_stream(
675
                                        [(r,) for r in 'abcdefgh'],
676
                                        'unordered', False))))
677
        # Now copy the blocks into another vf, and ensure that the blocks are
678
        # preserved without creating new entries
679
        vf2 = self.make_test_vf(True, dir='target')
680
        # ordering in 'groupcompress' order, should actually swap the groups in
681
        # the target vf, but the groups themselves should not be disturbed.
682
        list(vf2._insert_record_stream(vf.get_record_stream(
683
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
684
            reuse_blocks=False))
685
        vf2.writer.end()
686
        # After inserting with reuse_blocks=False, we should have everything in
687
        # a single new block.
688
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
689
                                       'groupcompress', False)
690
        block = None
691
        for record in stream:
692
            if block is None:
693
                block = record._manager._block
694
            else:
695
                self.assertIs(block, record._manager._block)
696
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
697
    def test_add_missing_noncompression_parent_unvalidated_index(self):
698
        unvalidated = self.make_g_index_missing_parent()
699
        combined = _mod_index.CombinedGraphIndex([unvalidated])
700
        index = groupcompress._GCGraphIndex(combined,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
701
            is_locked=lambda: True, parents=True,
702
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
703
        index.scan_unvalidated_index(unvalidated)
704
        self.assertEqual(
705
            frozenset([('missing-parent',)]), index.get_missing_parents())
706
707
    def test_track_external_parent_refs(self):
708
        g_index = self.make_g_index('empty', 1, [])
709
        mod_index = btree_index.BTreeBuilder(1, 1)
710
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
711
        index = groupcompress._GCGraphIndex(combined,
712
            is_locked=lambda: True, parents=True,
4343.3.21 by John Arbash Meinel
Implement get_missing_parents in terms of _KeyRefs.
713
            add_callback=mod_index.add_nodes,
714
            track_external_parent_refs=True)
4343.3.20 by John Arbash Meinel
Copy the track_external_parent_refs tests over to GCVF.
715
        index.add_records([
716
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
717
        self.assertEqual(
718
            frozenset([('parent-1',), ('parent-2',)]),
719
            index.get_missing_parents())
720
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
721
    def make_source_with_b(self, a_parent, path):
722
        source = self.make_test_vf(True, dir=path)
723
        source.add_lines(('a',), (), ['lines\n'])
724
        if a_parent:
725
            b_parents = (('a',),)
726
        else:
727
            b_parents = ()
728
        source.add_lines(('b',), b_parents, ['lines\n'])
729
        return source
730
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
731
    def do_inconsistent_inserts(self, inconsistency_fatal):
732
        target = self.make_test_vf(True, dir='target',
733
                                   inconsistency_fatal=inconsistency_fatal)
734
        for x in range(2):
735
            source = self.make_source_with_b(x==1, 'source%s' % x)
736
            target.insert_record_stream(source.get_record_stream(
737
                [('b',)], 'unordered', False))
738
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
739
    def test_inconsistent_redundant_inserts_warn(self):
4465.2.2 by Aaron Bentley
Add test that duplicates are skipped.
740
        """Should not insert a record that is already present."""
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
741
        warnings = []
742
        def warning(template, args):
743
            warnings.append(template % args)
744
        _trace_warning = trace.warning
745
        trace.warning = warning
746
        try:
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
747
            self.do_inconsistent_inserts(inconsistency_fatal=False)
4465.2.3 by Aaron Bentley
Update to change redundant inserts into a warning.
748
        finally:
749
            trace.warning = _trace_warning
750
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
751
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
752
                         warnings)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
753
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
754
    def test_inconsistent_redundant_inserts_raises(self):
755
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
756
                              inconsistency_fatal=True)
757
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
758
                              " in add_records:"
759
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
760
                              " 0 8', \(\(\('a',\),\),\)\)")
761
4744.2.5 by John Arbash Meinel
Change to a generic 'VersionedFiles.clear_cache()' api.
762
    def test_clear_cache(self):
763
        vf = self.make_source_with_b(True, 'source')
764
        vf.writer.end()
765
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
766
                                           True):
767
            pass
768
        self.assertTrue(len(vf._group_cache) > 0)
769
        vf.clear_cache()
770
        self.assertEqual(0, len(vf._group_cache))
771
772
4465.2.4 by Aaron Bentley
Switch between warn and raise depending on inconsistent_fatal.
773
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
774
class StubGCVF(object):
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
775
    def __init__(self, canned_get_blocks=None):
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
776
        self._group_cache = {}
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
777
        self._canned_get_blocks = canned_get_blocks or []
778
    def _get_blocks(self, read_memos):
779
        return iter(self._canned_get_blocks)
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
780
    
781
782
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
783
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
784
    
785
    def test_add_key_new_read_memo(self):
786
        """Adding a key with an uncached read_memo new to this batch adds that
787
        read_memo to the list of memos to fetch.
788
        """
789
        # locations are: index_memo, ignored, parents, ignored
790
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
791
        # and (idx, offset, size) is known as the 'read_memo', identifying the
792
        # raw bytes needed.
793
        read_memo = ('fake index', 100, 50)
794
        locations = {
795
            ('key',): (read_memo + (None, None), None, None, None)}
796
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
797
        total_size = batcher.add_key(('key',))
798
        self.assertEqual(50, total_size)
799
        self.assertEqual([('key',)], batcher.keys)
800
        self.assertEqual([read_memo], batcher.memos_to_get)
801
802
    def test_add_key_duplicate_read_memo(self):
803
        """read_memos that occur multiple times in a batch will only be fetched
804
        once.
805
        """
806
        read_memo = ('fake index', 100, 50)
807
        # Two keys, both sharing the same read memo (but different overall
808
        # index_memos).
809
        locations = {
810
            ('key1',): (read_memo + (0, 1), None, None, None),
811
            ('key2',): (read_memo + (1, 2), None, None, None)}
812
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
813
        total_size = batcher.add_key(('key1',))
814
        total_size = batcher.add_key(('key2',))
815
        self.assertEqual(50, total_size)
816
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
817
        self.assertEqual([read_memo], batcher.memos_to_get)
818
819
    def test_add_key_cached_read_memo(self):
820
        """Adding a key with a cached read_memo will not cause that read_memo
821
        to be added to the list to fetch.
822
        """
823
        read_memo = ('fake index', 100, 50)
824
        gcvf = StubGCVF()
825
        gcvf._group_cache[read_memo] = 'fake block'
826
        locations = {
827
            ('key',): (read_memo + (None, None), None, None, None)}
828
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
829
        total_size = batcher.add_key(('key',))
830
        self.assertEqual(0, total_size)
831
        self.assertEqual([('key',)], batcher.keys)
832
        self.assertEqual([], batcher.memos_to_get)
833
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
834
    def test_yield_factories_empty(self):
835
        """An empty batch yields no factories."""
836
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
837
        self.assertEqual([], list(batcher.yield_factories()))
838
839
    def test_yield_factories_calls_get_blocks(self):
4634.3.22 by Andrew Bennetts
Fix docstring.
840
        """Uncached memos are retrieved via get_blocks."""
4634.3.21 by Andrew Bennetts
Direct tests now have complete line coverage of _BatchingBlockFetcher (except for the assertion).
841
        read_memo1 = ('fake index', 100, 50)
842
        read_memo2 = ('fake index', 150, 40)
843
        gcvf = StubGCVF(
844
            canned_get_blocks=[
845
                (read_memo1, groupcompress.GroupCompressBlock()),
846
                (read_memo2, groupcompress.GroupCompressBlock())])
847
        locations = {
848
            ('key1',): (read_memo1 + (None, None), None, None, None),
849
            ('key2',): (read_memo2 + (None, None), None, None, None)}
850
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
851
        batcher.add_key(('key1',))
852
        batcher.add_key(('key2',))
853
        factories = list(batcher.yield_factories(full_flush=True))
854
        self.assertLength(2, factories)
855
        keys = [f.key for f in factories]
856
        kinds = [f.storage_kind for f in factories]
857
        self.assertEqual([('key1',), ('key2',)], keys)
858
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
859
860
    def test_yield_factories_flushing(self):
861
        """yield_factories holds back on yielding results from the final block
862
        unless passed full_flush=True.
863
        """
864
        fake_block = groupcompress.GroupCompressBlock()
865
        read_memo = ('fake index', 100, 50)
866
        gcvf = StubGCVF()
867
        gcvf._group_cache[read_memo] = fake_block
868
        locations = {
869
            ('key',): (read_memo + (None, None), None, None, None)}
870
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
871
        batcher.add_key(('key',))
872
        self.assertEqual([], list(batcher.yield_factories()))
873
        factories = list(batcher.yield_factories(full_flush=True))
874
        self.assertLength(1, factories)
875
        self.assertEqual(('key',), factories[0].key)
876
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
877
4634.3.20 by Andrew Bennetts
Some basic whitebox unit tests for _BatchingBlockFetcher.
878
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
879
class TestLazyGroupCompress(tests.TestCaseWithTransport):
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
880
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
881
    _texts = {
882
        ('key1',): "this is a text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
883
                   "with a reasonable amount of compressible bytes\n"
884
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
885
        ('key2',): "another text\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
886
                   "with a reasonable amount of compressible bytes\n"
887
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
888
        ('key3',): "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
889
                   "with a reasonable amount of compressible bytes\n"
890
                   "which can be shared between various other texts\n",
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
891
        ('key4',): "this will be extracted\n"
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
892
                   "but references most of its bytes from\n"
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
893
                   "yet another text which won't be extracted\n"
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
894
                   "with a reasonable amount of compressible bytes\n"
895
                   "which can be shared between various other texts\n",
3735.32.14 by John Arbash Meinel
Move the tests over to testing the LazyGroupContentManager object.
896
    }
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
897
    def make_block(self, key_to_text):
898
        """Create a GroupCompressBlock, filling it with the given texts."""
899
        compressor = groupcompress.GroupCompressor()
900
        start = 0
901
        for key in sorted(key_to_text):
902
            compressor.compress(key, key_to_text[key], None)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
903
        locs = dict((key, (start, end)) for key, (start, _, end, _)
904
                    in compressor.labels_deltas.iteritems())
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
905
        block = compressor.flush()
906
        raw_bytes = block.to_bytes()
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
907
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
908
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
909
    def add_key_to_manager(self, key, locations, block, manager):
910
        start, end = locations[key]
911
        manager.add_factory(key, (), start, end)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
912
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
913
    def make_block_and_full_manager(self, texts):
914
        locations, block = self.make_block(texts)
915
        manager = groupcompress._LazyGroupContentManager(block)
916
        for key in sorted(texts):
917
            self.add_key_to_manager(key, locations, block, manager)
918
        return block, manager
919
3735.32.8 by John Arbash Meinel
Some tests for the LazyGroupCompressFactory
920
    def test_get_fulltexts(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
921
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
922
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
923
        self.add_key_to_manager(('key1',), locations, block, manager)
924
        self.add_key_to_manager(('key2',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
925
        result_order = []
926
        for record in manager.get_record_stream():
927
            result_order.append(record.key)
928
            text = self._texts[record.key]
929
            self.assertEqual(text, record.get_bytes_as('fulltext'))
930
        self.assertEqual([('key1',), ('key2',)], result_order)
931
932
        # If we build the manager in the opposite order, we should get them
933
        # back in the opposite order
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
934
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
935
        self.add_key_to_manager(('key2',), locations, block, manager)
936
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
937
        result_order = []
938
        for record in manager.get_record_stream():
939
            result_order.append(record.key)
940
            text = self._texts[record.key]
941
            self.assertEqual(text, record.get_bytes_as('fulltext'))
942
        self.assertEqual([('key2',), ('key1',)], result_order)
943
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
944
    def test__wire_bytes_no_keys(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
945
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
946
        manager = groupcompress._LazyGroupContentManager(block)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
947
        wire_bytes = manager._wire_bytes()
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
948
        block_length = len(block.to_bytes())
3735.32.24 by John Arbash Meinel
_wire_bytes() now strips groups as necessary, as does _insert_record_stream
949
        # We should have triggered a strip, since we aren't using any content
950
        stripped_block = manager._block.to_bytes()
951
        self.assertTrue(block_length > len(stripped_block))
952
        empty_z_header = zlib.compress('')
953
        self.assertEqual('groupcompress-block\n'
954
                         '8\n' # len(compress(''))
955
                         '0\n' # len('')
956
                         '%d\n'# compressed block len
957
                         '%s'  # zheader
958
                         '%s'  # block
959
                         % (len(stripped_block), empty_z_header,
960
                            stripped_block),
961
                         wire_bytes)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
962
3735.32.15 by John Arbash Meinel
Change the GroupCompressBlock code to allow not recording 'end'.
963
    def test__wire_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
964
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
965
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
966
        self.add_key_to_manager(('key1',), locations, block, manager)
967
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
968
        block_bytes = block.to_bytes()
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
969
        wire_bytes = manager._wire_bytes()
970
        (storage_kind, z_header_len, header_len,
971
         block_len, rest) = wire_bytes.split('\n', 4)
972
        z_header_len = int(z_header_len)
973
        header_len = int(header_len)
974
        block_len = int(block_len)
975
        self.assertEqual('groupcompress-block', storage_kind)
4665.3.8 by John Arbash Meinel
Of course, when you change the content, it can effect the stored wire bytes slightly.
976
        self.assertEqual(34, z_header_len)
977
        self.assertEqual(26, header_len)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
978
        self.assertEqual(len(block_bytes), block_len)
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
979
        z_header = rest[:z_header_len]
980
        header = zlib.decompress(z_header)
981
        self.assertEqual(header_len, len(header))
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
982
        entry1 = locations[('key1',)]
983
        entry4 = locations[('key4',)]
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
984
        self.assertEqualDiff('key1\n'
985
                             '\n'  # no parents
986
                             '%d\n' # start offset
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
987
                             '%d\n' # end offset
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
988
                             'key4\n'
989
                             '\n'
990
                             '%d\n'
991
                             '%d\n'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
992
                             % (entry1[0], entry1[1],
993
                                entry4[0], entry4[1]),
3735.32.16 by John Arbash Meinel
We now have a general header for the GC block.
994
                            header)
995
        z_block = rest[z_header_len:]
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
996
        self.assertEqual(block_bytes, z_block)
997
998
    def test_from_bytes(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
999
        locations, block = self.make_block(self._texts)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1000
        manager = groupcompress._LazyGroupContentManager(block)
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1001
        self.add_key_to_manager(('key1',), locations, block, manager)
1002
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1003
        wire_bytes = manager._wire_bytes()
1004
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
3735.32.18 by John Arbash Meinel
We now support generating a network stream.
1005
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1006
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1007
        self.assertEqual(2, len(manager._factories))
3735.32.17 by John Arbash Meinel
We now round-trip the wire_bytes.
1008
        self.assertEqual(block._z_content, manager._block._z_content)
1009
        result_order = []
1010
        for record in manager.get_record_stream():
1011
            result_order.append(record.key)
1012
            text = self._texts[record.key]
1013
            self.assertEqual(text, record.get_bytes_as('fulltext'))
3735.38.2 by John Arbash Meinel
Make the text for key4 slightly longer, rather than include key3.
1014
        self.assertEqual([('key1',), ('key4',)], result_order)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1015
1016
    def test__check_rebuild_no_changes(self):
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1017
        block, manager = self.make_block_and_full_manager(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1018
        manager._check_rebuild_block()
1019
        self.assertIs(block, manager._block)
1020
1021
    def test__check_rebuild_only_one(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1022
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1023
        manager = groupcompress._LazyGroupContentManager(block)
1024
        # Request just the first key, which should trigger a 'strip' action
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1025
        self.add_key_to_manager(('key1',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1026
        manager._check_rebuild_block()
1027
        self.assertIsNot(block, manager._block)
1028
        self.assertTrue(block._content_length > manager._block._content_length)
1029
        # We should be able to still get the content out of this block, though
1030
        # it should only have 1 entry
1031
        for record in manager.get_record_stream():
1032
            self.assertEqual(('key1',), record.key)
1033
            self.assertEqual(self._texts[record.key],
1034
                             record.get_bytes_as('fulltext'))
1035
1036
    def test__check_rebuild_middle(self):
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1037
        locations, block = self.make_block(self._texts)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1038
        manager = groupcompress._LazyGroupContentManager(block)
1039
        # Request a small key in the middle should trigger a 'rebuild'
3735.40.18 by John Arbash Meinel
Get rid of the entries dict in GroupCompressBlock.
1040
        self.add_key_to_manager(('key4',), locations, block, manager)
3735.32.23 by John Arbash Meinel
Add a _LazyGroupContentManager._check_rebuild_block
1041
        manager._check_rebuild_block()
1042
        self.assertIsNot(block, manager._block)
1043
        self.assertTrue(block._content_length > manager._block._content_length)
1044
        for record in manager.get_record_stream():
1045
            self.assertEqual(('key4',), record.key)
1046
            self.assertEqual(self._texts[record.key],
1047
                             record.get_bytes_as('fulltext'))
4665.3.7 by John Arbash Meinel
We needed a bit more data to actually get groups doing delta-compression.
1048
1049
    def test_check_is_well_utilized_all_keys(self):
1050
        block, manager = self.make_block_and_full_manager(self._texts)
1051
        self.assertFalse(manager.check_is_well_utilized())
1052
        # Though we can fake it by changing the recommended minimum size
1053
        manager._full_enough_block_size = block._content_length
1054
        self.assertTrue(manager.check_is_well_utilized())
1055
        # Setting it just above causes it to fail
1056
        manager._full_enough_block_size = block._content_length + 1
1057
        self.assertFalse(manager.check_is_well_utilized())
1058
        # Setting the mixed-block size doesn't do anything, because the content
1059
        # is considered to not be 'mixed'
1060
        manager._full_enough_mixed_block_size = block._content_length
1061
        self.assertFalse(manager.check_is_well_utilized())
1062
1063
    def test_check_is_well_utilized_mixed_keys(self):
1064
        texts = {}
1065
        f1k1 = ('f1', 'k1')
1066
        f1k2 = ('f1', 'k2')
1067
        f2k1 = ('f2', 'k1')
1068
        f2k2 = ('f2', 'k2')
1069
        texts[f1k1] = self._texts[('key1',)]
1070
        texts[f1k2] = self._texts[('key2',)]
1071
        texts[f2k1] = self._texts[('key3',)]
1072
        texts[f2k2] = self._texts[('key4',)]
1073
        block, manager = self.make_block_and_full_manager(texts)
1074
        self.assertFalse(manager.check_is_well_utilized())
1075
        manager._full_enough_block_size = block._content_length
1076
        self.assertTrue(manager.check_is_well_utilized())
1077
        manager._full_enough_block_size = block._content_length + 1
1078
        self.assertFalse(manager.check_is_well_utilized())
1079
        manager._full_enough_mixed_block_size = block._content_length
1080
        self.assertTrue(manager.check_is_well_utilized())
1081
1082
    def test_check_is_well_utilized_partial_use(self):
1083
        locations, block = self.make_block(self._texts)
1084
        manager = groupcompress._LazyGroupContentManager(block)
1085
        manager._full_enough_block_size = block._content_length
1086
        self.add_key_to_manager(('key1',), locations, block, manager)
1087
        self.add_key_to_manager(('key2',), locations, block, manager)
1088
        # Just using the content from key1 and 2 is not enough to be considered
1089
        # 'complete'
1090
        self.assertFalse(manager.check_is_well_utilized())
1091
        # However if we add key3, then we have enough, as we only require 75%
1092
        # consumption
1093
        self.add_key_to_manager(('key4',), locations, block, manager)
1094
        self.assertTrue(manager.check_is_well_utilized())
5365.4.1 by John Arbash Meinel
Find a case where we are wasting a bit of memory.
1095
1096
1097
class Test_GCBuildDetails(tests.TestCase):
1098
1099
    def test_acts_like_tuple(self):
1100
        # _GCBuildDetails inlines some of the data that used to be spread out
1101
        # across a bunch of tuples
1102
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1103
            ('INDEX', 10, 20, 0, 5))
1104
        self.assertEqual(4, len(bd))
1105
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1106
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1107
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1108
        self.assertEqual(('group', None), bd[3]) # Record details
1109
1110
    def test__repr__(self):
1111
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1112
            ('INDEX', 10, 20, 0, 5))
1113
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1114
                         " (('parent1',), ('parent2',)))",
1115
                         repr(bd))
1116