1
# Copyright (C) 2007-2010 Canonical Ltd
1
# Copyright (C) 2007-2011 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
32
34
from bzrlib.lazy_import import lazy_import
33
35
lazy_import(globals(), """
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
38
revision as _mod_revision,
39
42
from bzrlib import (
69
72
class GraphIndexBuilder(object):
70
73
"""A builder that can build a GraphIndex.
72
The resulting graph has the structure:
75
The resulting graph has the structure::
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
77
_SIGNATURE OPTIONS NODES NEWLINE
78
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
79
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
81
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
82
KEY := Not-whitespace-utf8
84
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
85
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
86
REFERENCE := DIGITS ; digits is the byte offset in the index of the
88
VALUE := no-newline-no-null-bytes
88
91
def __init__(self, reference_lists=0, key_elements=1):
184
187
:param value: The value associate with this key. Must not contain
185
188
newlines or null characters.
186
189
:return: (node_refs, absent_references)
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
191
* node_refs: basically a packed form of 'references' where all
193
* absent_references: reference keys that are not in self._nodes.
194
This may contain duplicates if the same key is referenced in
193
197
as_st = StaticTuple.from_sequence
194
198
self._check_key(key)
219
223
:param references: An iterable of iterables of keys. Each is a
220
224
reference to another key.
221
225
:param value: The value to associate with the key. It may be any
222
bytes as long as it does not contain \0 or \n.
226
bytes as long as it does not contain \\0 or \\n.
225
229
absent_references) = self._check_key_ref_value(key, references, value)
245
249
def finish(self):
252
:returns: cStringIO holding the full context of the index as it
253
should be written to disk.
246
255
lines = [_SIGNATURE]
247
256
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
257
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
444
453
# We already did this
446
455
if 'index' in debug.debug_flags:
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
456
trace.mutter('Reading entire index %s',
457
self._transport.abspath(self._name))
448
458
if stream is None:
449
459
stream = self._transport.get(self._name)
450
460
if self._base_offset != 0:
463
473
pos = stream.tell()
464
474
lines = stream.read().split('\n')
475
# GZ 2009-09-20: Should really use a try/finally block to ensure close
467
478
_, _, _, trailers = self._parse_lines(lines, pos)
670
681
if self._nodes is not None:
671
682
return self._iter_entries_from_total_buffer(keys)
673
return (result[1] for result in bisect_multi_bytes(
684
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
685
self._lookup_keys_via_location, self._size, keys))
676
687
def iter_entries_prefix(self, keys):
1287
1298
def get_parent_map(self, keys):
1288
1299
"""See graph.StackedParentsProvider.get_parent_map"""
1289
1300
search_keys = set(keys)
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1301
if _mod_revision.NULL_REVISION in search_keys:
1302
search_keys.discard(_mod_revision.NULL_REVISION)
1303
found_parents = {_mod_revision.NULL_REVISION:[]}
1294
1305
found_parents = {}
1295
1306
for index, key, value, refs in self.iter_entries(search_keys):
1296
1307
parents = refs[0]
1297
1308
if not parents:
1298
parents = (NULL_REVISION,)
1309
parents = (_mod_revision.NULL_REVISION,)
1299
1310
found_parents[key] = parents
1300
1311
return found_parents
1418
1429
_move_to_front propagates to all objects in self._sibling_indices by
1419
1430
calling _move_to_front_by_name.
1432
if self._indices[:len(hit_indices)] == hit_indices:
1433
# The 'hit_indices' are already at the front (and in the same
1434
# order), no need to re-order
1421
1436
hit_names = self._move_to_front_by_index(hit_indices)
1422
1437
for sibling_idx in self._sibling_indices:
1423
1438
sibling_idx._move_to_front_by_name(hit_names)
1430
1445
indices_info = zip(self._index_names, self._indices)
1431
1446
if 'index' in debug.debug_flags:
1432
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1433
indices_info, hit_indices)
1434
hit_indices_info = []
1447
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1448
'promoting %r', indices_info, hit_indices)
1436
unhit_indices_info = []
1437
for name, idx in indices_info:
1451
new_hit_indices = []
1454
for offset, (name, idx) in enumerate(indices_info):
1438
1455
if idx in hit_indices:
1439
info = hit_indices_info
1440
1456
hit_names.append(name)
1457
new_hit_indices.append(idx)
1458
if len(new_hit_indices) == len(hit_indices):
1459
# We've found all of the hit entries, everything else is
1461
unhit_names.extend(self._index_names[offset+1:])
1462
unhit_indices.extend(self._indices[offset+1:])
1442
info = unhit_indices_info
1443
info.append((name, idx))
1444
final_info = hit_indices_info + unhit_indices_info
1445
self._indices = [idx for (name, idx) in final_info]
1446
self._index_names = [name for (name, idx) in final_info]
1465
unhit_names.append(name)
1466
unhit_indices.append(idx)
1468
self._indices = new_hit_indices + unhit_indices
1469
self._index_names = hit_names + unhit_names
1447
1470
if 'index' in debug.debug_flags:
1448
mutter('CombinedGraphIndex reordered: %r', self._indices)
1471
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1449
1472
return hit_names
1451
1474
def _move_to_front_by_name(self, hit_names):