1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
23
from bzrlib import debug, errors, pack, repository
24
from bzrlib.index import GraphIndex, GraphIndexBuilder
25
from bzrlib.repository import InterPackRepo
26
from bzrlib.plugins.groupcompress.groupcompress import (
28
GroupCompressVersionedFiles,
30
from bzrlib.plugins.index2.btree_index import (
33
FixedMemoryGraphIndex,
35
from bzrlib.osutils import rand_chars
36
from bzrlib.repofmt.pack_repo import (
40
RepositoryPackCollection,
41
RepositoryFormatPackDevelopment0,
42
RepositoryFormatPackDevelopment0Subtree,
43
RepositoryFormatKnitPack1,
44
RepositoryFormatKnitPack3,
45
RepositoryFormatKnitPack4,
54
return self._pack_collection.pack_factory(self._pack_collection._upload_transport,
55
self._pack_collection._index_transport,
56
self._pack_collection._pack_transport, upload_suffix=self.suffix,
57
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
60
Packer.open_pack = open_pack
63
class GCPack(NewPack):
65
def __init__(self, upload_transport, index_transport, pack_transport,
66
upload_suffix='', file_mode=None):
67
"""Create a NewPack instance.
69
:param upload_transport: A writable transport for the pack to be
70
incrementally uploaded to.
71
:param index_transport: A writable transport for the pack's indices to
72
be written to when the pack is finished.
73
:param pack_transport: A writable transport for the pack to be renamed
74
to when the upload is complete. This *must* be the same as
75
upload_transport.clone('../packs').
76
:param upload_suffix: An optional suffix to be given to any temporary
77
files created during the pack creation. e.g '.autopack'
78
:param file_mode: An optional file mode to create the new files with.
80
# The relative locations of the packs are constrained, but all are
81
# passed in because the caller has them, so as to avoid object churn.
83
# Revisions: parents list, no text compression.
84
BTreeBuilder(reference_lists=1),
85
# Inventory: compressed, with graph for compatibility with other
86
# existing bzrlib code.
87
BTreeBuilder(reference_lists=1),
88
# Texts: per file graph:
89
BTreeBuilder(reference_lists=1, key_elements=2),
90
# Signatures: Just blobs to store, no compression, no parents
92
BTreeBuilder(reference_lists=0),
94
# where should the new pack be opened
95
self.upload_transport = upload_transport
96
# where are indices written out to
97
self.index_transport = index_transport
98
# where is the pack renamed to when it is finished?
99
self.pack_transport = pack_transport
100
# What file mode to upload the pack and indices with.
101
self._file_mode = file_mode
102
# tracks the content written to the .pack file.
103
self._hash = md5.new()
104
# a four-tuple with the length in bytes of the indices, once the pack
105
# is finalised. (rev, inv, text, sigs)
106
self.index_sizes = None
107
# How much data to cache when writing packs. Note that this is not
108
# synchronised with reads, because it's not in the transport layer, so
109
# is not safe unless the client knows it won't be reading from the pack
111
self._cache_limit = 0
112
# the temporary pack file name.
113
self.random_name = rand_chars(20) + upload_suffix
114
# when was this pack started ?
115
self.start_time = time.time()
116
# open an output stream for the data added to the pack.
117
self.write_stream = self.upload_transport.open_write_stream(
118
self.random_name, mode=self._file_mode)
119
if 'pack' in debug.debug_flags:
120
mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
121
time.ctime(), self.upload_transport.base, self.random_name,
122
time.time() - self.start_time)
123
# A list of byte sequences to be written to the new pack, and the
124
# aggregate size of them. Stored as a list rather than separate
125
# variables so that the _write_data closure below can update them.
126
self._buffer = [[], 0]
127
# create a callable for adding data
129
# robertc says- this is a closure rather than a method on the object
130
# so that the variables are locals, and faster than accessing object
132
def _write_data(bytes, flush=False, _buffer=self._buffer,
133
_write=self.write_stream.write, _update=self._hash.update):
134
_buffer[0].append(bytes)
135
_buffer[1] += len(bytes)
137
if _buffer[1] > self._cache_limit or flush:
138
bytes = ''.join(_buffer[0])
142
# expose this on self, for the occasion when clients want to add data.
143
self._write_data = _write_data
144
# a pack writer object to serialise pack records.
145
self._writer = pack.ContainerWriter(self._write_data)
147
# what state is the pack in? (open, finished, aborted)
150
def _replace_index_with_readonly(self, index_type):
151
setattr(self, index_type + '_index',
152
BTreeGraphIndex(self.index_transport,
153
self.index_name(index_type, self.name),
154
self.index_sizes[self.index_offset(index_type)]))
157
RepositoryPackCollection.pack_factory = NewPack
159
class GCRepositoryPackCollection(RepositoryPackCollection):
161
pack_factory = GCPack
163
def _make_index(self, name, suffix):
164
"""Overridden to use BTreeGraphIndex objects."""
165
size_offset = self._suffix_offsets[suffix]
166
index_name = name + suffix
167
index_size = self._names[name][size_offset]
168
return BTreeGraphIndex(
169
self._index_transport, index_name, index_size)
171
def _start_write_group(self):
172
# Do not permit preparation for writing if we're not in a 'write lock'.
173
if not self.repo.is_write_locked():
174
raise errors.NotWriteLocked(self)
175
self._new_pack = self.pack_factory(self._upload_transport, self._index_transport,
176
self._pack_transport, upload_suffix='.pack',
177
file_mode=self.repo.bzrdir._get_file_mode())
178
# allow writing: queue writes to a new index
179
self.revision_index.add_writable_index(self._new_pack.revision_index,
181
self.inventory_index.add_writable_index(self._new_pack.inventory_index,
183
self.text_index.add_writable_index(self._new_pack.text_index,
185
self.signature_index.add_writable_index(self._new_pack.signature_index,
188
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
189
self.repo.revisions._index._add_callback = self.revision_index.add_callback
190
self.repo.signatures._index._add_callback = self.signature_index.add_callback
191
self.repo.texts._index._add_callback = self.text_index.add_callback
195
class GCPackRepository(KnitPackRepository):
196
"""GC customisation of KnitPackRepository."""
198
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
200
"""Overridden to change pack collection class."""
201
KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
202
_commit_builder_class, _serializer)
203
# and now replace everything it did :)
204
index_transport = self._transport.clone('indices')
205
self._pack_collection = GCRepositoryPackCollection(self,
206
self._transport, index_transport,
207
self._transport.clone('upload'),
208
self._transport.clone('packs'))
209
self.inventories = GroupCompressVersionedFiles(
210
_GCGraphIndex(self._pack_collection.inventory_index.combined_index,
211
add_callback=self._pack_collection.inventory_index.add_callback,
212
parents=True, is_locked=self.is_locked),
213
access=self._pack_collection.inventory_index.data_access)
214
self.revisions = GroupCompressVersionedFiles(
215
_GCGraphIndex(self._pack_collection.revision_index.combined_index,
216
add_callback=self._pack_collection.revision_index.add_callback,
217
parents=True, is_locked=self.is_locked),
218
access=self._pack_collection.revision_index.data_access,
220
self.signatures = GroupCompressVersionedFiles(
221
_GCGraphIndex(self._pack_collection.signature_index.combined_index,
222
add_callback=self._pack_collection.signature_index.add_callback,
223
parents=False, is_locked=self.is_locked),
224
access=self._pack_collection.signature_index.data_access,
226
self.texts = GroupCompressVersionedFiles(
227
_GCGraphIndex(self._pack_collection.text_index.combined_index,
228
add_callback=self._pack_collection.text_index.add_callback,
229
parents=True, is_locked=self.is_locked),
230
access=self._pack_collection.text_index.data_access)
231
# True when the repository object is 'write locked' (as opposed to the
232
# physical lock only taken out around changes to the pack-names list.)
233
# Another way to represent this would be a decorator around the control
234
# files object that presents logical locks as physical ones - if this
235
# gets ugly consider that alternative design. RBC 20071011
236
self._write_lock_count = 0
237
self._transaction = None
239
self._reconcile_does_inventory_gc = True
240
self._reconcile_fixes_text_parents = True
241
self._reconcile_backsup_inventory = False
244
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment0):
245
"""A B+Tree index using pack repository."""
247
repository_class = GCPackRepository
249
def get_format_string(self):
250
"""See RepositoryFormat.get_format_string()."""
251
return ("Bazaar development format - btree+gc "
252
"(needs bzr.dev from 1.6)\n")
254
def get_format_description(self):
255
"""See RepositoryFormat.get_format_description()."""
256
return ("Development repository format - btree+groupcompress "
257
", interoperates with pack-0.92\n")
260
class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
261
"""A B+Tree index using pack repository."""
263
repository_class = GCPackRepository
265
def get_format_string(self):
266
"""See RepositoryFormat.get_format_string()."""
267
return ("Bazaar development format - btree+gc-rich-root "
268
"(needs bzr.dev from 1.6)\n")
270
def get_format_description(self):
271
"""See RepositoryFormat.get_format_description()."""
272
return ("Development repository format - btree+groupcompress "
273
", interoperates with rich-root-pack\n")
276
class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment0Subtree):
277
"""A B+Tree index using pack repository."""
279
repository_class = GCPackRepository
281
def get_format_string(self):
282
"""See RepositoryFormat.get_format_string()."""
283
return ("Bazaar development format - btree+gc-subtrees "
284
"(needs bzr.dev from 1.6)\n")
286
def get_format_description(self):
287
"""See RepositoryFormat.get_format_description()."""
288
return ("Development repository format - btree+groupcompress "
289
", interoperates with pack-0.92-subtrees\n")
292
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
293
formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
294
RepositoryFormatPackGCSubtrees)
295
if isinstance(source._format, formats) or isinstance(target, formats):
298
return orig_method(source, target)
301
InterPackRepo.is_compatible = staticmethod(pack_incompatible)