111
79
if not element or _whitespace_re.search(element) is not None:
112
80
raise errors.BadIndexKey(element)
114
def _external_references(self):
115
"""Return references that are not present in this index.
119
# TODO: JAM 2008-11-21 This makes an assumption about how the reference
120
# lists are used. It is currently correct for pack-0.92 through
121
# 1.9, which use the node references (3rd column) second
122
# reference list as the compression parent. Perhaps this should
123
# be moved into something higher up the stack, since it
124
# makes assumptions about how the index is used.
125
if self.reference_lists > 1:
126
for node in self.iter_all_entries():
128
refs.update(node[3][1])
131
# If reference_lists == 0 there can be no external references, and
132
# if reference_lists == 1, then there isn't a place to store the
136
def _get_nodes_by_key(self):
137
if self._nodes_by_key is None:
139
if self.reference_lists:
140
for key, (absent, references, value) in self._nodes.iteritems():
143
key_dict = nodes_by_key
144
for subkey in key[:-1]:
145
key_dict = key_dict.setdefault(subkey, {})
146
key_dict[key[-1]] = key, value, references
148
for key, (absent, references, value) in self._nodes.iteritems():
151
key_dict = nodes_by_key
152
for subkey in key[:-1]:
153
key_dict = key_dict.setdefault(subkey, {})
154
key_dict[key[-1]] = key, value
155
self._nodes_by_key = nodes_by_key
156
return self._nodes_by_key
158
def _update_nodes_by_key(self, key, value, node_refs):
159
"""Update the _nodes_by_key dict with a new key.
161
For a key of (foo, bar, baz) create
162
_nodes_by_key[foo][bar][baz] = key_value
164
if self._nodes_by_key is None:
166
key_dict = self._nodes_by_key
167
if self.reference_lists:
168
key_value = key, value, node_refs
170
key_value = key, value
171
for subkey in key[:-1]:
172
key_dict = key_dict.setdefault(subkey, {})
173
key_dict[key[-1]] = key_value
175
def _check_key_ref_value(self, key, references, value):
176
"""Check that 'key' and 'references' are all valid.
178
:param key: A key tuple. Must conform to the key interface (be a tuple,
179
be of the right length, not have any whitespace or nulls in any key
181
:param references: An iterable of reference lists. Something like
182
[[(ref, key)], [(ref, key), (other, key)]]
183
:param value: The value associate with this key. Must not contain
184
newlines or null characters.
185
:return: (node_refs, absent_references)
186
node_refs basically a packed form of 'references' where all
188
absent_references reference keys that are not in self._nodes.
189
This may contain duplicates if the same key is
190
referenced in multiple lists.
82
def add_node(self, key, value, references=()):
83
"""Add a node to the index.
85
:param key: The key. keys are non-empty tuples containing
86
as many whitespace-free utf8 bytestrings as the key length
87
defined for this index.
88
:param references: An iterable of iterables of keys. Each is a
89
reference to another key.
90
:param value: The value to associate with the key. It may be any
91
bytes as long as it does not contain \0 or \n.
192
93
self._check_key(key)
193
94
if _newline_null_re.search(value) is not None:
353
223
suitable for production use. :XXX
356
def __init__(self, transport, name, size):
226
def __init__(self, transport, name):
357
227
"""Open an index called name on transport.
359
229
:param transport: A bzrlib.transport.Transport.
360
230
:param name: A path to provide to transport API calls.
361
:param size: The size of the index in bytes. This is used for bisection
362
logic to perform partial index reads. While the size could be
363
obtained by statting the file this introduced an additional round
364
trip as well as requiring stat'able transports, both of which are
365
avoided by having it supplied. If size is None, then bisection
366
support will be disabled and accessing the index will just stream
369
232
self._transport = transport
370
233
self._name = name
371
# Becomes a dict of key:(value, reference-list-byte-locations) used by
372
# the bisection interface to store parsed but not resolved keys.
373
self._bisect_nodes = None
374
# Becomes a dict of key:(value, reference-list-keys) which are ready to
375
# be returned directly to callers.
376
234
self._nodes = None
377
# a sorted list of slice-addresses for the parsed bytes of the file.
378
# e.g. (0,1) would mean that byte 0 is parsed.
379
self._parsed_byte_map = []
380
# a sorted list of keys matching each slice address for parsed bytes
381
# e.g. (None, 'foo@bar') would mean that the first byte contained no
382
# key, and the end byte of the slice is the of the data for 'foo@bar'
383
self._parsed_key_map = []
384
self._key_count = None
385
235
self._keys_by_offset = None
386
236
self._nodes_by_key = None
388
# The number of bytes we've read so far in trying to process this file
391
def __eq__(self, other):
392
"""Equal when self and other were created with the same parameters."""
394
type(self) == type(other) and
395
self._transport == other._transport and
396
self._name == other._name and
397
self._size == other._size)
399
def __ne__(self, other):
400
return not self.__eq__(other)
403
return "%s(%r)" % (self.__class__.__name__,
404
self._transport.abspath(self._name))
406
def _buffer_all(self, stream=None):
238
def _buffer_all(self):
407
239
"""Buffer all the index data.
409
241
Mutates self._nodes and self.keys_by_offset.
411
if self._nodes is not None:
412
# We already did this
414
if 'index' in debug.debug_flags:
415
mutter('Reading entire index %s', self._transport.abspath(self._name))
417
stream = self._transport.get(self._name)
243
stream = self._transport.get(self._name)
418
244
self._read_prefix(stream)
419
self._expected_elements = 3 + self._key_length
245
expected_elements = 3 + self._key_length
421
247
# raw data keyed by offset
422
248
self._keys_by_offset = {}
423
249
# ready-to-return key:value or key:value, node_ref_lists
425
self._nodes_by_key = None
251
self._nodes_by_key = {}
427
253
pos = stream.tell()
428
lines = stream.read().split('\n')
430
_, _, _, trailers = self._parse_lines(lines, pos)
254
for line in stream.readlines():
258
elements = line.split('\0')
259
if len(elements) != expected_elements:
260
raise errors.BadIndexData(self)
262
key = tuple(elements[:self._key_length])
263
absent, references, value = elements[-3:]
264
value = value[:-1] # remove the newline
266
for ref_string in references.split('\t'):
267
ref_lists.append(tuple([
268
int(ref) for ref in ref_string.split('\r') if ref
270
ref_lists = tuple(ref_lists)
271
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
431
273
for key, absent, references, value in self._keys_by_offset.itervalues():
434
276
# resolve references:
435
277
if self.node_ref_lists:
436
node_value = (value, self._resolve_references(references))
279
for ref_list in references:
280
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
281
node_value = (value, tuple(node_refs))
438
283
node_value = value
439
284
self._nodes[key] = node_value
440
# cache the keys for quick set intersections
285
if self._key_length > 1:
286
subkey = list(reversed(key[:-1]))
287
key_dict = self._nodes_by_key
288
if self.node_ref_lists:
289
key_value = key, node_value[0], node_value[1]
291
key_value = key, node_value
292
# possibly should do this on-demand, but it seems likely it is
294
# For a key of (foo, bar, baz) create
295
# _nodes_by_key[foo][bar][baz] = key_value
296
for subkey in key[:-1]:
297
key_dict = key_dict.setdefault(subkey, {})
298
key_dict[key[-1]] = key_value
441
299
self._keys = set(self._nodes)
442
300
if trailers != 1:
443
301
# there must be one line - the empty trailer line.
444
302
raise errors.BadIndexData(self)
446
def external_references(self, ref_list_num):
447
"""Return references that are not present in this index.
450
if ref_list_num + 1 > self.node_ref_lists:
451
raise ValueError('No ref list %d, index has %d ref lists'
452
% (ref_list_num, self.node_ref_lists))
454
for key, (value, ref_lists) in self._nodes.iteritems():
455
ref_list = ref_lists[ref_list_num]
456
refs.update(ref_list)
457
return refs - self._keys
459
def _get_nodes_by_key(self):
460
if self._nodes_by_key is None:
462
if self.node_ref_lists:
463
for key, (value, references) in self._nodes.iteritems():
464
key_dict = nodes_by_key
465
for subkey in key[:-1]:
466
key_dict = key_dict.setdefault(subkey, {})
467
key_dict[key[-1]] = key, value, references
469
for key, value in self._nodes.iteritems():
470
key_dict = nodes_by_key
471
for subkey in key[:-1]:
472
key_dict = key_dict.setdefault(subkey, {})
473
key_dict[key[-1]] = key, value
474
self._nodes_by_key = nodes_by_key
475
return self._nodes_by_key
477
304
def iter_all_entries(self):
478
305
"""Iterate over all keys within the index.
480
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
307
:return: An iterable of (key, value) or (key, value, reference_lists).
481
308
The former tuple is used when there are no reference lists in the
482
309
index, making the API compatible with simple key:value index types.
483
310
There is no defined order for the result iteration - it will be in
484
311
the most efficient order for the index.
486
if 'evil' in debug.debug_flags:
487
trace.mutter_callsite(3,
488
"iter_all_entries scales with size of history.")
489
313
if self._nodes is None:
490
314
self._buffer_all()
491
315
if self.node_ref_lists:
513
337
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
514
338
except ValueError:
515
339
raise errors.BadIndexOptions(self)
516
options_line = stream.readline()
517
if not options_line.startswith(_OPTION_LEN):
518
raise errors.BadIndexOptions(self)
520
self._key_count = int(options_line[len(_OPTION_LEN):-1])
522
raise errors.BadIndexOptions(self)
524
def _resolve_references(self, references):
525
"""Return the resolved key references for references.
527
References are resolved by looking up the location of the key in the
528
_keys_by_offset map and substituting the key name, preserving ordering.
530
:param references: An iterable of iterables of key locations. e.g.
532
:return: A tuple of tuples of keys.
535
for ref_list in references:
536
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
537
return tuple(node_refs)
539
def _find_index(self, range_map, key):
540
"""Helper for the _parsed_*_index calls.
542
Given a range map - [(start, end), ...], finds the index of the range
543
in the map for key if it is in the map, and if it is not there, the
544
immediately preceeding range in the map.
546
result = bisect_right(range_map, key) - 1
547
if result + 1 < len(range_map):
548
# check the border condition, it may be in result + 1
549
if range_map[result + 1][0] == key[0]:
553
def _parsed_byte_index(self, offset):
554
"""Return the index of the entry immediately before offset.
556
e.g. if the parsed map has regions 0,10 and 11,12 parsed, meaning that
557
there is one unparsed byte (the 11th, addressed as[10]). then:
558
asking for 0 will return 0
559
asking for 10 will return 0
560
asking for 11 will return 1
561
asking for 12 will return 1
564
return self._find_index(self._parsed_byte_map, key)
566
def _parsed_key_index(self, key):
567
"""Return the index of the entry immediately before key.
569
e.g. if the parsed map has regions (None, 'a') and ('b','c') parsed,
570
meaning that keys from None to 'a' inclusive, and 'b' to 'c' inclusive
571
have been parsed, then:
572
asking for '' will return 0
573
asking for 'a' will return 0
574
asking for 'b' will return 1
575
asking for 'e' will return 1
577
search_key = (key, None)
578
return self._find_index(self._parsed_key_map, search_key)
580
def _is_parsed(self, offset):
581
"""Returns True if offset has been parsed."""
582
index = self._parsed_byte_index(offset)
583
if index == len(self._parsed_byte_map):
584
return offset < self._parsed_byte_map[index - 1][1]
585
start, end = self._parsed_byte_map[index]
586
return offset >= start and offset < end
588
def _iter_entries_from_total_buffer(self, keys):
589
"""Iterate over keys when the entire index is parsed."""
341
def iter_entries(self, keys):
342
"""Iterate over keys within the index.
344
:param keys: An iterable providing the keys to be retrieved.
345
:return: An iterable as per iter_all_entries, but restricted to the
346
keys supplied. No additional keys will be returned, and every
347
key supplied that is in the index will be returned.
352
if self._nodes is None:
590
354
keys = keys.intersection(self._keys)
591
355
if self.node_ref_lists:
703
432
# the last thing looked up was a terminal element
704
433
yield (self, ) + key_dict
707
"""Return an estimate of the number of keys in this index.
709
For GraphIndex the estimate is exact.
711
if self._key_count is None:
712
self._read_and_parse([_HEADER_READV])
713
return self._key_count
715
def _lookup_keys_via_location(self, location_keys):
716
"""Public interface for implementing bisection.
718
If _buffer_all has been called, then all the data for the index is in
719
memory, and this method should not be called, as it uses a separate
720
cache because it cannot pre-resolve all indices, which buffer_all does
723
:param location_keys: A list of location(byte offset), key tuples.
724
:return: A list of (location_key, result) tuples as expected by
725
bzrlib.bisect_multi.bisect_multi_bytes.
727
# Possible improvements:
728
# - only bisect lookup each key once
729
# - sort the keys first, and use that to reduce the bisection window
731
# this progresses in three parts:
734
# attempt to answer the question from the now in memory data.
735
# build the readv request
736
# for each location, ask for 800 bytes - much more than rows we've seen
739
for location, key in location_keys:
740
# can we answer from cache?
741
if self._bisect_nodes and key in self._bisect_nodes:
742
# We have the key parsed.
744
index = self._parsed_key_index(key)
745
if (len(self._parsed_key_map) and
746
self._parsed_key_map[index][0] <= key and
747
(self._parsed_key_map[index][1] >= key or
748
# end of the file has been parsed
749
self._parsed_byte_map[index][1] == self._size)):
750
# the key has been parsed, so no lookup is needed even if its
753
# - if we have examined this part of the file already - yes
754
index = self._parsed_byte_index(location)
755
if (len(self._parsed_byte_map) and
756
self._parsed_byte_map[index][0] <= location and
757
self._parsed_byte_map[index][1] > location):
758
# the byte region has been parsed, so no read is needed.
761
if location + length > self._size:
762
length = self._size - location
763
# todo, trim out parsed locations.
765
readv_ranges.append((location, length))
766
# read the header if needed
767
if self._bisect_nodes is None:
768
readv_ranges.append(_HEADER_READV)
769
self._read_and_parse(readv_ranges)
771
if self._nodes is not None:
772
# _read_and_parse triggered a _buffer_all because we requested the
774
for location, key in location_keys:
775
if key not in self._nodes: # not present
776
result.append(((location, key), False))
777
elif self.node_ref_lists:
778
value, refs = self._nodes[key]
779
result.append(((location, key),
780
(self, key, value, refs)))
782
result.append(((location, key),
783
(self, key, self._nodes[key])))
786
# - figure out <, >, missing, present
787
# - result present references so we can return them.
788
# keys that we cannot answer until we resolve references
789
pending_references = []
790
pending_locations = set()
791
for location, key in location_keys:
792
# can we answer from cache?
793
if key in self._bisect_nodes:
794
# the key has been parsed, so no lookup is needed
795
if self.node_ref_lists:
796
# the references may not have been all parsed.
797
value, refs = self._bisect_nodes[key]
798
wanted_locations = []
799
for ref_list in refs:
801
if ref not in self._keys_by_offset:
802
wanted_locations.append(ref)
804
pending_locations.update(wanted_locations)
805
pending_references.append((location, key))
807
result.append(((location, key), (self, key,
808
value, self._resolve_references(refs))))
810
result.append(((location, key),
811
(self, key, self._bisect_nodes[key])))
814
# has the region the key should be in, been parsed?
815
index = self._parsed_key_index(key)
816
if (self._parsed_key_map[index][0] <= key and
817
(self._parsed_key_map[index][1] >= key or
818
# end of the file has been parsed
819
self._parsed_byte_map[index][1] == self._size)):
820
result.append(((location, key), False))
822
# no, is the key above or below the probed location:
823
# get the range of the probed & parsed location
824
index = self._parsed_byte_index(location)
825
# if the key is below the start of the range, its below
826
if key < self._parsed_key_map[index][0]:
830
result.append(((location, key), direction))
832
# lookup data to resolve references
833
for location in pending_locations:
835
if location + length > self._size:
836
length = self._size - location
837
# TODO: trim out parsed locations (e.g. if the 800 is into the
838
# parsed region trim it, and dont use the adjust_for_latency
841
readv_ranges.append((location, length))
842
self._read_and_parse(readv_ranges)
843
if self._nodes is not None:
844
# The _read_and_parse triggered a _buffer_all, grab the data and
846
for location, key in pending_references:
847
value, refs = self._nodes[key]
848
result.append(((location, key), (self, key, value, refs)))
850
for location, key in pending_references:
851
# answer key references we had to look-up-late.
852
value, refs = self._bisect_nodes[key]
853
result.append(((location, key), (self, key,
854
value, self._resolve_references(refs))))
857
def _parse_header_from_bytes(self, bytes):
858
"""Parse the header from a region of bytes.
860
:param bytes: The data to parse.
861
:return: An offset, data tuple such as readv yields, for the unparsed
862
data. (which may length 0).
864
signature = bytes[0:len(self._signature())]
865
if not signature == self._signature():
866
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
867
lines = bytes[len(self._signature()):].splitlines()
868
options_line = lines[0]
869
if not options_line.startswith(_OPTION_NODE_REFS):
870
raise errors.BadIndexOptions(self)
872
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
874
raise errors.BadIndexOptions(self)
875
options_line = lines[1]
876
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
877
raise errors.BadIndexOptions(self)
879
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
881
raise errors.BadIndexOptions(self)
882
options_line = lines[2]
883
if not options_line.startswith(_OPTION_LEN):
884
raise errors.BadIndexOptions(self)
886
self._key_count = int(options_line[len(_OPTION_LEN):])
888
raise errors.BadIndexOptions(self)
889
# calculate the bytes we have processed
890
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
892
self._parsed_bytes(0, None, header_end, None)
893
# setup parsing state
894
self._expected_elements = 3 + self._key_length
895
# raw data keyed by offset
896
self._keys_by_offset = {}
897
# keys with the value and node references
898
self._bisect_nodes = {}
899
return header_end, bytes[header_end:]
901
def _parse_region(self, offset, data):
902
"""Parse node data returned from a readv operation.
904
:param offset: The byte offset the data starts at.
905
:param data: The data to parse.
909
end = offset + len(data)
912
# Trivial test - if the current index's end is within the
913
# low-matching parsed range, we're done.
914
index = self._parsed_byte_index(high_parsed)
915
if end < self._parsed_byte_map[index][1]:
917
# print "[%d:%d]" % (offset, end), \
918
# self._parsed_byte_map[index:index + 2]
919
high_parsed, last_segment = self._parse_segment(
920
offset, data, end, index)
924
def _parse_segment(self, offset, data, end, index):
925
"""Parse one segment of data.
927
:param offset: Where 'data' begins in the file.
928
:param data: Some data to parse a segment of.
929
:param end: Where data ends
930
:param index: The current index into the parsed bytes map.
931
:return: True if the parsed segment is the last possible one in the
933
:return: high_parsed_byte, last_segment.
934
high_parsed_byte is the location of the highest parsed byte in this
935
segment, last_segment is True if the parsed segment is the last
936
possible one in the data block.
938
# default is to use all data
940
# accomodate overlap with data before this.
941
if offset < self._parsed_byte_map[index][1]:
942
# overlaps the lower parsed region
943
# skip the parsed data
944
trim_start = self._parsed_byte_map[index][1] - offset
945
# don't trim the start for \n
946
start_adjacent = True
947
elif offset == self._parsed_byte_map[index][1]:
948
# abuts the lower parsed region
951
# do not trim anything
952
start_adjacent = True
954
# does not overlap the lower parsed region
957
# but trim the leading \n
958
start_adjacent = False
959
if end == self._size:
960
# lines up to the end of all data:
963
# do not strip to the last \n
966
elif index + 1 == len(self._parsed_byte_map):
967
# at the end of the parsed data
970
# but strip to the last \n
973
elif end == self._parsed_byte_map[index + 1][0]:
974
# buts up against the next parsed region
977
# do not strip to the last \n
980
elif end > self._parsed_byte_map[index + 1][0]:
981
# overlaps into the next parsed region
982
# only consider the unparsed data
983
trim_end = self._parsed_byte_map[index + 1][0] - offset
984
# do not strip to the last \n as we know its an entire record
986
last_segment = end < self._parsed_byte_map[index + 1][1]
988
# does not overlap into the next region
991
# but strip to the last \n
994
# now find bytes to discard if needed
995
if not start_adjacent:
996
# work around python bug in rfind
997
if trim_start is None:
998
trim_start = data.find('\n') + 1
1000
trim_start = data.find('\n', trim_start) + 1
1001
if not (trim_start != 0):
1002
raise AssertionError('no \n was present')
1003
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1004
if not end_adjacent:
1005
# work around python bug in rfind
1006
if trim_end is None:
1007
trim_end = data.rfind('\n') + 1
1009
trim_end = data.rfind('\n', None, trim_end) + 1
1010
if not (trim_end != 0):
1011
raise AssertionError('no \n was present')
1012
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1013
# adjust offset and data to the parseable data.
1014
trimmed_data = data[trim_start:trim_end]
1015
if not (trimmed_data):
1016
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1017
% (trim_start, trim_end, offset, offset + len(data)))
1019
offset += trim_start
1020
# print "parsing", repr(trimmed_data)
1021
# splitlines mangles the \r delimiters.. don't use it.
1022
lines = trimmed_data.split('\n')
1025
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1026
for key, value in nodes:
1027
self._bisect_nodes[key] = value
1028
self._parsed_bytes(offset, first_key,
1029
offset + len(trimmed_data), last_key)
1030
return offset + len(trimmed_data), last_segment
1032
def _parse_lines(self, lines, pos):
1039
# must be at the end
1041
if not (self._size == pos + 1):
1042
raise AssertionError("%s %s" % (self._size, pos))
1045
elements = line.split('\0')
1046
if len(elements) != self._expected_elements:
1047
raise errors.BadIndexData(self)
1048
# keys are tuples. Each element is a string that may occur many
1049
# times, so we intern them to save space. AB, RC, 200807
1050
key = tuple([intern(element) for element in elements[:self._key_length]])
1051
if first_key is None:
1053
absent, references, value = elements[-3:]
1055
for ref_string in references.split('\t'):
1056
ref_lists.append(tuple([
1057
int(ref) for ref in ref_string.split('\r') if ref
1059
ref_lists = tuple(ref_lists)
1060
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1061
pos += len(line) + 1 # +1 for the \n
1064
if self.node_ref_lists:
1065
node_value = (value, ref_lists)
1068
nodes.append((key, node_value))
1069
# print "parsed ", key
1070
return first_key, key, nodes, trailers
1072
def _parsed_bytes(self, start, start_key, end, end_key):
1073
"""Mark the bytes from start to end as parsed.
1075
Calling self._parsed_bytes(1,2) will mark one byte (the one at offset
1078
:param start: The start of the parsed region.
1079
:param end: The end of the parsed region.
1081
index = self._parsed_byte_index(start)
1082
new_value = (start, end)
1083
new_key = (start_key, end_key)
1085
# first range parsed is always the beginning.
1086
self._parsed_byte_map.insert(index, new_value)
1087
self._parsed_key_map.insert(index, new_key)
1091
# extend lower region
1092
# extend higher region
1093
# combine two regions
1094
if (index + 1 < len(self._parsed_byte_map) and
1095
self._parsed_byte_map[index][1] == start and
1096
self._parsed_byte_map[index + 1][0] == end):
1097
# combine two regions
1098
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
1099
self._parsed_byte_map[index + 1][1])
1100
self._parsed_key_map[index] = (self._parsed_key_map[index][0],
1101
self._parsed_key_map[index + 1][1])
1102
del self._parsed_byte_map[index + 1]
1103
del self._parsed_key_map[index + 1]
1104
elif self._parsed_byte_map[index][1] == start:
1105
# extend the lower entry
1106
self._parsed_byte_map[index] = (
1107
self._parsed_byte_map[index][0], end)
1108
self._parsed_key_map[index] = (
1109
self._parsed_key_map[index][0], end_key)
1110
elif (index + 1 < len(self._parsed_byte_map) and
1111
self._parsed_byte_map[index + 1][0] == end):
1112
# extend the higher entry
1113
self._parsed_byte_map[index + 1] = (
1114
start, self._parsed_byte_map[index + 1][1])
1115
self._parsed_key_map[index + 1] = (
1116
start_key, self._parsed_key_map[index + 1][1])
1119
self._parsed_byte_map.insert(index + 1, new_value)
1120
self._parsed_key_map.insert(index + 1, new_key)
1122
def _read_and_parse(self, readv_ranges):
1123
"""Read the the ranges and parse the resulting data.
1125
:param readv_ranges: A prepared readv range list.
1127
if not readv_ranges:
1129
if self._nodes is None and self._bytes_read * 2 >= self._size:
1130
# We've already read more than 50% of the file and we are about to
1131
# request more data, just _buffer_all() and be done
1135
readv_data = self._transport.readv(self._name, readv_ranges, True,
1138
for offset, data in readv_data:
1139
self._bytes_read += len(data)
1140
if offset == 0 and len(data) == self._size:
1141
# We read the whole range, most likely because the
1142
# Transport upcast our readv ranges into one long request
1143
# for enough total data to grab the whole index.
1144
self._buffer_all(StringIO(data))
1146
if self._bisect_nodes is None:
1147
# this must be the start
1148
if not (offset == 0):
1149
raise AssertionError()
1150
offset, data = self._parse_header_from_bytes(data)
1151
# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))
1152
self._parse_region(offset, data)
1154
435
def _signature(self):
1155
436
"""The file signature for this index type."""
1156
437
return _SIGNATURE