1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for repositories that support CHK indices."""
25
from bzrlib.remote import RemoteRepository
26
from bzrlib.versionedfile import VersionedFiles
27
from bzrlib.tests import TestNotApplicable
28
from bzrlib.tests.per_repository_chk import TestCaseWithRepositoryCHK
31
class TestCHKSupport(TestCaseWithRepositoryCHK):
33
def test_chk_bytes_attribute_is_VersionedFiles(self):
34
repo = self.make_repository('.')
35
self.assertIsInstance(repo.chk_bytes, VersionedFiles)
37
def test_add_bytes_to_chk_bytes_store(self):
38
repo = self.make_repository('.')
41
repo.start_write_group()
43
sha1, len, _ = repo.chk_bytes.add_lines((None,),
44
None, ["foo\n", "bar\n"], random_id=True)
45
self.assertEqual('4e48e2c9a3d2ca8a708cb0cc545700544efb5021',
48
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
49
repo.chk_bytes.keys())
51
repo.abort_write_group()
54
repo.commit_write_group()
57
# And after an unlock/lock pair
61
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
62
repo.chk_bytes.keys())
66
repo = repo.bzrdir.open_repository()
70
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
71
repo.chk_bytes.keys())
75
def test_pack_preserves_chk_bytes_store(self):
76
leaf_lines = ["chkleaf:\n", "0\n", "1\n", "0\n", "\n"]
77
leaf_sha1 = osutils.sha_strings(leaf_lines)
78
node_lines = ["chknode:\n", "0\n", "1\n", "1\n", "foo\n",
79
"\x00sha1:%s\n" % (leaf_sha1,)]
80
node_sha1 = osutils.sha_strings(node_lines)
81
expected_set = set([('sha1:' + leaf_sha1,), ('sha1:' + node_sha1,)])
82
repo = self.make_repository('.')
85
repo.start_write_group()
87
# Internal node pointing at a leaf.
88
repo.chk_bytes.add_lines((None,), None, node_lines, random_id=True)
90
repo.abort_write_group()
93
repo.commit_write_group()
94
repo.start_write_group()
96
# Leaf in a separate pack.
97
repo.chk_bytes.add_lines((None,), None, leaf_lines, random_id=True)
99
repo.abort_write_group()
102
repo.commit_write_group()
104
self.assertEqual(expected_set, repo.chk_bytes.keys())
108
repo = repo.bzrdir.open_repository()
111
self.assertEqual(expected_set, repo.chk_bytes.keys())
115
def test_chk_bytes_are_fully_buffered(self):
116
repo = self.make_repository('.')
118
self.addCleanup(repo.unlock)
119
repo.start_write_group()
121
sha1, len, _ = repo.chk_bytes.add_lines((None,),
122
None, ["foo\n", "bar\n"], random_id=True)
123
self.assertEqual('4e48e2c9a3d2ca8a708cb0cc545700544efb5021',
126
set([('sha1:4e48e2c9a3d2ca8a708cb0cc545700544efb5021',)]),
127
repo.chk_bytes.keys())
129
repo.abort_write_group()
132
repo.commit_write_group()
133
# This may not always be correct if we change away from BTreeGraphIndex
134
# in the future. But for now, lets check that chk_bytes are fully
136
index = repo.chk_bytes._index._graph_index._indices[0]
137
self.assertIsInstance(index, btree_index.BTreeGraphIndex)
138
self.assertIs(type(index._leaf_node_cache), dict)
139
# Re-opening the repository should also have a repo with everything
141
repo2 = repository.Repository.open(self.get_url())
143
self.addCleanup(repo2.unlock)
144
index = repo2.chk_bytes._index._graph_index._indices[0]
145
self.assertIsInstance(index, btree_index.BTreeGraphIndex)
146
self.assertIs(type(index._leaf_node_cache), dict)
149
class TestCommitWriteGroupIntegrityCheck(TestCaseWithRepositoryCHK):
150
"""Tests that commit_write_group prevents various kinds of invalid data
151
from being committed to a CHK repository.
154
def reopen_repo_and_resume_write_group(self, repo):
155
resume_tokens = repo.suspend_write_group()
157
reopened_repo = repo.bzrdir.open_repository()
158
reopened_repo.lock_write()
159
self.addCleanup(reopened_repo.unlock)
160
reopened_repo.resume_write_group(resume_tokens)
163
def test_missing_chk_root_for_inventory(self):
164
"""commit_write_group fails with BzrCheckError when the chk root record
165
for a new inventory is missing.
167
repo = self.make_repository('damaged-repo')
168
builder = self.make_branch_builder('simple-branch')
169
builder.build_snapshot('A-id', None, [
170
('add', ('', 'root-id', 'directory', None)),
171
('add', ('file', 'file-id', 'file', 'content\n'))])
172
b = builder.get_branch()
174
self.addCleanup(b.unlock)
176
repo.start_write_group()
177
# Now, add the objects manually
178
text_keys = [('file-id', 'A-id'), ('root-id', 'A-id')]
179
# Directly add the texts, inventory, and revision object for 'A-id' --
180
# but don't add the chk_bytes.
181
src_repo = b.repository
182
repo.texts.insert_record_stream(src_repo.texts.get_record_stream(
183
text_keys, 'unordered', True))
184
repo.inventories.insert_record_stream(
185
src_repo.inventories.get_record_stream(
186
[('A-id',)], 'unordered', True))
187
repo.revisions.insert_record_stream(
188
src_repo.revisions.get_record_stream(
189
[('A-id',)], 'unordered', True))
190
# Make sure the presence of the missing data in a fallback does not
192
repo.add_fallback_repository(b.repository)
193
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
194
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
196
errors.BzrCheckError, reopened_repo.commit_write_group)
197
reopened_repo.abort_write_group()
199
def test_missing_chk_root_for_unchanged_inventory(self):
200
"""commit_write_group fails with BzrCheckError when the chk root record
201
for a new inventory is missing, even if the parent inventory is present
202
and has identical content (i.e. the same chk root).
204
A stacked repository containing only a revision with an identical
205
inventory to its parent will still have the chk root records for those
208
(In principle the chk records are unnecessary in this case, but in
209
practice bzr 2.0rc1 (at least) expects to find them.)
211
repo = self.make_repository('damaged-repo')
212
# Make a branch where the last two revisions have identical
214
builder = self.make_branch_builder('simple-branch')
215
builder.build_snapshot('A-id', None, [
216
('add', ('', 'root-id', 'directory', None)),
217
('add', ('file', 'file-id', 'file', 'content\n'))])
218
builder.build_snapshot('B-id', None, [])
219
builder.build_snapshot('C-id', None, [])
220
b = builder.get_branch()
222
self.addCleanup(b.unlock)
223
# check our setup: B-id and C-id should have identical chk root keys.
224
inv_b = b.repository.get_inventory('B-id')
225
inv_c = b.repository.get_inventory('C-id')
226
if not isinstance(repo, RemoteRepository):
227
# Remote repositories always return plain inventories
228
self.assertEqual(inv_b.id_to_entry.key(), inv_c.id_to_entry.key())
229
# Now, manually insert objects for a stacked repo with only revision
231
# We need ('revisions', 'C-id'), ('inventories', 'C-id'),
232
# ('inventories', 'B-id'), and the corresponding chk roots for those
235
repo.start_write_group()
236
src_repo = b.repository
237
repo.inventories.insert_record_stream(
238
src_repo.inventories.get_record_stream(
239
[('B-id',), ('C-id',)], 'unordered', True))
240
repo.revisions.insert_record_stream(
241
src_repo.revisions.get_record_stream(
242
[('C-id',)], 'unordered', True))
243
# Make sure the presence of the missing data in a fallback does not
245
repo.add_fallback_repository(b.repository)
246
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
247
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
249
errors.BzrCheckError, reopened_repo.commit_write_group)
250
reopened_repo.abort_write_group()
252
def test_missing_chk_leaf_for_inventory(self):
253
"""commit_write_group fails with BzrCheckError when the chk root record
254
for a parent inventory of a new revision is missing.
256
repo = self.make_repository('damaged-repo')
257
if isinstance(repo, RemoteRepository):
258
raise TestNotApplicable(
259
"Unable to obtain CHKInventory from remote repo")
260
b = self.make_branch_with_multiple_chk_nodes()
261
src_repo = b.repository
263
self.addCleanup(src_repo.unlock)
264
# Now, manually insert objects for a stacked repo with only revision
265
# C-id, *except* drop the non-root chk records.
266
inv_b = src_repo.get_inventory('B-id')
267
inv_c = src_repo.get_inventory('C-id')
268
chk_root_keys_only = [
269
inv_b.id_to_entry.key(), inv_b.parent_id_basename_to_file_id.key(),
270
inv_c.id_to_entry.key(), inv_c.parent_id_basename_to_file_id.key()]
271
all_chks = src_repo.chk_bytes.keys()
272
# Pick a non-root key to drop
273
key_to_drop = all_chks.difference(chk_root_keys_only).pop()
274
all_chks.discard(key_to_drop)
276
repo.start_write_group()
277
repo.chk_bytes.insert_record_stream(
278
src_repo.chk_bytes.get_record_stream(
279
all_chks, 'unordered', True))
280
repo.texts.insert_record_stream(
281
src_repo.texts.get_record_stream(
282
src_repo.texts.keys(), 'unordered', True))
283
repo.inventories.insert_record_stream(
284
src_repo.inventories.get_record_stream(
285
[('B-id',), ('C-id',)], 'unordered', True))
286
repo.revisions.insert_record_stream(
287
src_repo.revisions.get_record_stream(
288
[('C-id',)], 'unordered', True))
289
# Make sure the presence of the missing data in a fallback does not
291
repo.add_fallback_repository(b.repository)
292
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
293
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
295
errors.BzrCheckError, reopened_repo.commit_write_group)
296
reopened_repo.abort_write_group()
298
def test_missing_chk_root_for_parent_inventory(self):
299
"""commit_write_group fails with BzrCheckError when the chk root record
300
for a parent inventory of a new revision is missing.
302
repo = self.make_repository('damaged-repo')
303
if isinstance(repo, RemoteRepository):
304
raise TestNotApplicable(
305
"Unable to obtain CHKInventory from remote repo")
306
b = self.make_branch_with_multiple_chk_nodes()
308
self.addCleanup(b.unlock)
309
# Now, manually insert objects for a stacked repo with only revision
310
# C-id, *except* the chk root entry for the parent inventory.
311
# We need ('revisions', 'C-id'), ('inventories', 'C-id'),
312
# ('inventories', 'B-id'), and the corresponding chk roots for those
314
inv_c = b.repository.get_inventory('C-id')
315
chk_keys_for_c_only = [
316
inv_c.id_to_entry.key(), inv_c.parent_id_basename_to_file_id.key()]
318
repo.start_write_group()
319
src_repo = b.repository
320
repo.chk_bytes.insert_record_stream(
321
src_repo.chk_bytes.get_record_stream(
322
chk_keys_for_c_only, 'unordered', True))
323
repo.inventories.insert_record_stream(
324
src_repo.inventories.get_record_stream(
325
[('B-id',), ('C-id',)], 'unordered', True))
326
repo.revisions.insert_record_stream(
327
src_repo.revisions.get_record_stream(
328
[('C-id',)], 'unordered', True))
329
# Make sure the presence of the missing data in a fallback does not
331
repo.add_fallback_repository(b.repository)
332
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
333
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
335
errors.BzrCheckError, reopened_repo.commit_write_group)
336
reopened_repo.abort_write_group()
338
def make_branch_with_multiple_chk_nodes(self):
339
# add and modify files with very long file-ids, so that the chk map
340
# will need more than just a root node.
341
builder = self.make_branch_builder('simple-branch')
347
('add', ('file-' + name, 'file-%s-id' % name, 'file',
348
'content %s\n' % name)))
349
file_modifies.append(
350
('modify', ('file-%s-id' % name, 'new content %s\n' % name)))
351
builder.build_snapshot('A-id', None, [
352
('add', ('', 'root-id', 'directory', None))] +
354
builder.build_snapshot('B-id', None, [])
355
builder.build_snapshot('C-id', None, file_modifies)
356
return builder.get_branch()
358
def test_missing_text_record(self):
359
"""commit_write_group fails with BzrCheckError when a text is missing.
361
repo = self.make_repository('damaged-repo')
362
b = self.make_branch_with_multiple_chk_nodes()
363
src_repo = b.repository
365
self.addCleanup(src_repo.unlock)
366
# Now, manually insert objects for a stacked repo with only revision
367
# C-id, *except* drop one changed text.
368
all_texts = src_repo.texts.keys()
369
all_texts.remove(('file-%s-id' % ('c'*10000,), 'C-id'))
371
repo.start_write_group()
372
repo.chk_bytes.insert_record_stream(
373
src_repo.chk_bytes.get_record_stream(
374
src_repo.chk_bytes.keys(), 'unordered', True))
375
repo.texts.insert_record_stream(
376
src_repo.texts.get_record_stream(
377
all_texts, 'unordered', True))
378
repo.inventories.insert_record_stream(
379
src_repo.inventories.get_record_stream(
380
[('B-id',), ('C-id',)], 'unordered', True))
381
repo.revisions.insert_record_stream(
382
src_repo.revisions.get_record_stream(
383
[('C-id',)], 'unordered', True))
384
# Make sure the presence of the missing data in a fallback does not
386
repo.add_fallback_repository(b.repository)
387
self.assertRaises(errors.BzrCheckError, repo.commit_write_group)
388
reopened_repo = self.reopen_repo_and_resume_write_group(repo)
390
errors.BzrCheckError, reopened_repo.commit_write_group)
391
reopened_repo.abort_write_group()