~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_groupcompress.py

Change GroupCompressor.compress() to return the start_point.

Also, mark empty content with start=end=0.
This also gives us a good starting point to handle duplicate entries (if we
find that makes a difference.)
From experimentation, using 0,0 for empty entries actually makes a big difference
in the text index. Mostly because about 1/2 of all entries have no content,
(all of the directory records, for example), so it allows the compression
to shrink the index a bit.

Show diffs side-by-side

added added

removed removed

Lines of Context:
42
42
    def test_one_nosha_delta(self):
43
43
        # diff against NUKK
44
44
        compressor = groupcompress.GroupCompressor()
45
 
        sha1, end_point, _, _ = compressor.compress(('label',),
 
45
        sha1, start_point, end_point, _, _ = compressor.compress(('label',),
46
46
            'strange\ncommon\n', None)
47
47
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
48
48
        expected_lines = [
49
49
            'f', '\x0f', 'strange\ncommon\n',
50
50
            ]
51
51
        self.assertEqual(expected_lines, compressor.lines)
 
52
        self.assertEqual(0, start_point)
52
53
        self.assertEqual(sum(map(len, expected_lines)), end_point)
53
54
 
 
55
    def test_empty_content(self):
 
56
        compressor = groupcompress.GroupCompressor()
 
57
        # Adding empty bytes should return the 'null' record
 
58
        sha1, start_point, end_point, kind, _ = compressor.compress(('empty',),
 
59
            '', None)
 
60
        self.assertEqual(0, start_point)
 
61
        self.assertEqual(0, end_point)
 
62
        self.assertEqual('fulltext', kind)
 
63
        self.assertEqual(groupcompress._null_sha1, sha1)
 
64
        self.assertEqual(0, compressor.endpoint)
 
65
        self.assertEqual([], compressor.lines)
 
66
        # Even after adding some content
 
67
        compressor.compress(('content',), 'some\nbytes\n', None)
 
68
        self.assertTrue(compressor.endpoint > 0)
 
69
        sha1, start_point, end_point, kind, _ = compressor.compress(('empty2',),
 
70
            '', None)
 
71
        self.assertEqual(0, start_point)
 
72
        self.assertEqual(0, end_point)
 
73
        self.assertEqual('fulltext', kind)
 
74
        self.assertEqual(groupcompress._null_sha1, sha1)
 
75
 
54
76
    def _chunks_to_repr_lines(self, chunks):
55
77
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
56
78
 
68
90
 
69
91
    def test_two_nosha_delta(self):
70
92
        compressor = groupcompress.GroupCompressor()
71
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
93
        sha1_1, _, _, _, _ = compressor.compress(('label',),
72
94
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
73
95
        expected_lines = list(compressor.lines)
74
 
        sha1_2, end_point, _, _ = compressor.compress(('newlabel',),
 
96
        sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
75
97
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
76
98
        self.assertEqual(sha_string('common long line\n'
77
99
                                    'that needs a 16 byte match\n'
93
115
        # The first interesting test: make a change that should use lines from
94
116
        # both parents.
95
117
        compressor = groupcompress.GroupCompressor()
96
 
        sha1_1, end_point, _, _ = compressor.compress(('label',),
 
118
        sha1_1, _, _, _, _ = compressor.compress(('label',),
97
119
            'strange\ncommon very very long line\nwith some extra text\n', None)
98
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
 
120
        sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
99
121
            'different\nmoredifferent\nand then some more\n', None)
100
122
        expected_lines = list(compressor.lines)
101
 
        sha1_3, end_point, _, _ = compressor.compress(('label3',),
 
123
        sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
102
124
            'new\ncommon very very long line\nwith some extra text\n'
103
125
            'different\nmoredifferent\nand then some more\n',
104
126
            None)
137
159
        # Knit fetching will try to reconstruct texts locally which results in
138
160
        # reading something that is in the compressor stream already.
139
161
        compressor = groupcompress.GroupCompressor()
140
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
162
        sha1_1, _, _, _, _ = compressor.compress(('label',),
141
163
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
142
164
        expected_lines = list(compressor.lines)
143
 
        sha1_2, end_point, _, _ = compressor.compress(('newlabel',),
 
165
        sha1_2, _, end_point, _, _ = compressor.compress(('newlabel',),
144
166
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
145
167
        # get the first out
146
168
        self.assertEqual(('strange\ncommon long line\n'