457
462
if self._id_index:
458
463
self._id_index.setdefault(entry_key[2], set()).add(entry_key)
460
def _bisect(self, paths):
465
def _bisect(self, dir_name_list):
461
466
"""Bisect through the disk structure for specific rows.
463
:param paths: A list of paths to find
464
:return: A dict mapping path => entries for found entries. Missing
468
:param dir_name_list: A list of (dir, name) pairs.
469
:return: A dict mapping (dir, name) => entry for found entries. Missing
465
470
entries will not be in the map.
466
The list is not sorted, and entries will be populated
467
based on when they were read.
469
472
self._requires_lock()
470
473
# We need the file pointer to be right after the initial header block
599
596
# Either we will find them here, or we can mark them as
602
if middle_files[0] == first_path:
599
if middle_files[0] == first_dir_name:
603
600
# We might need to go before this location
604
pre.append(first_path)
605
if middle_files[-1] == last_path:
606
post.insert(0, last_path)
601
pre.append(first_dir_name)
602
if middle_files[-1] == last_dir_name:
603
post.insert(0, last_dir_name)
608
605
# Find out what paths we have
609
paths = {first_path:[first_fields]}
610
# last_path might == first_path so we need to be
606
paths = {first_dir_name:[first_fields]}
607
# last_dir_name might == first_dir_name so we need to be
611
608
# careful if we should append rather than overwrite
612
609
if last_entry_num != first_entry_num:
613
paths.setdefault(last_path, []).append(last_fields)
610
paths.setdefault(last_dir_name, []).append(last_fields)
614
611
for num in xrange(first_entry_num+1, last_entry_num):
615
612
# TODO: jam 20070223 We are already splitting here, so
616
613
# shouldn't we just split the whole thing rather
617
614
# than doing the split again in add_one_record?
618
615
fields = entries[num].split('\0')
620
path = fields[1] + '/' + fields[2]
623
paths.setdefault(path, []).append(fields)
616
dir_name = (fields[1], fields[2])
617
paths.setdefault(dir_name, []).append(fields)
625
for path in middle_files:
626
for fields in paths.get(path, []):
619
for dir_name in middle_files:
620
for fields in paths.get(dir_name, []):
627
621
# offset by 1 because of the opening '\0'
628
622
# consider changing fields_to_entry to avoid the
629
623
# extra list slice
630
624
entry = fields_to_entry(fields[1:])
631
found.setdefault(path, []).append(entry)
625
found.setdefault(dir_name, []).append(entry)
633
627
# Now we have split up everything into pre, middle, and post, and
634
628
# we have handled everything that fell in 'middle'.
873
867
if dir_name[0] in pending_dirs:
874
868
# This entry will be found in the dir search
870
# TODO: We need to check if this entry has
871
# already been found. Otherwise we might be
872
# hitting infinite recursion.
876
873
if dir_name not in found_dir_names:
877
paths_to_search.add(tree_info[1])
874
paths_to_search.add(dir_name)
878
875
# Now we have a list of paths to look for directly, and
879
876
# directory blocks that need to be read.
880
877
# newly_found is mixing the keys between (dir, name) and path
1554
1551
self._read_header_if_needed()
1555
1552
if self._dirblock_state == DirState.NOT_IN_MEMORY:
1556
_read_dirblocks(self)
1553
# move the _state_file pointer to after the header (in case bisect
1554
# has been called in the mean time)
1555
self._state_file.seek(self._end_of_header)
1556
text = self._state_file.read()
1557
# TODO: check the crc checksums. crc_measured = zlib.crc32(text)
1559
fields = text.split('\0')
1560
# Remove the last blank entry
1561
trailing = fields.pop()
1562
assert trailing == ''
1563
# consider turning fields into a tuple.
1565
# skip the first field which is the trailing null from the header.
1567
# Each line now has an extra '\n' field which is not used
1568
# so we just skip over it
1570
# 3 fields for the key
1571
# + number of fields per tree_data (5) * tree count
1573
num_present_parents = self._num_present_parents()
1574
tree_count = 1 + num_present_parents
1575
entry_size = self._fields_per_entry()
1576
expected_field_count = entry_size * self._num_entries
1577
field_count = len(fields)
1578
# this checks our adjustment, and also catches file too short.
1579
assert field_count - cur == expected_field_count, \
1580
'field count incorrect %s != %s, entry_size=%s, '\
1581
'num_entries=%s fields=%r' % (
1582
field_count - cur, expected_field_count, entry_size,
1583
self._num_entries, fields)
1585
if num_present_parents == 1:
1586
# Bind external functions to local names
1588
# We access all fields in order, so we can just iterate over
1589
# them. Grab an straight iterator over the fields. (We use an
1590
# iterator because we don't want to do a lot of additions, nor
1591
# do we want to do a lot of slicing)
1592
next = iter(fields).next
1593
# Move the iterator to the current position
1594
for x in xrange(cur):
1596
# The two blocks here are deliberate: the root block and the
1597
# contents-of-root block.
1598
self._dirblocks = [('', []), ('', [])]
1599
current_block = self._dirblocks[0][1]
1600
current_dirname = ''
1601
append_entry = current_block.append
1602
for count in xrange(self._num_entries):
1606
if dirname != current_dirname:
1607
# new block - different dirname
1609
current_dirname = dirname
1610
self._dirblocks.append((current_dirname, current_block))
1611
append_entry = current_block.append
1612
# we know current_dirname == dirname, so re-use it to avoid
1613
# creating new strings
1614
entry = ((current_dirname, name, file_id),
1617
next(), # fingerprint
1618
_int(next()), # size
1619
next() == 'y', # executable
1620
next(), # packed_stat or revision_id
1624
next(), # fingerprint
1625
_int(next()), # size
1626
next() == 'y', # executable
1627
next(), # packed_stat or revision_id
1631
assert trailing == '\n'
1632
# append the entry to the current block
1634
self._split_root_dirblock_into_contents()
1636
fields_to_entry = self._get_fields_to_entry()
1637
entries = [fields_to_entry(fields[pos:pos+entry_size])
1638
for pos in xrange(cur, field_count, entry_size)]
1639
self._entries_to_current_state(entries)
1640
# To convert from format 2 => format 3
1641
# self._dirblocks = sorted(self._dirblocks,
1642
# key=lambda blk:blk[0].split('/'))
1643
# To convert from format 3 => format 2
1644
# self._dirblocks = sorted(self._dirblocks)
1645
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
1558
1647
def _read_header(self):
1559
1648
"""This reads in the metadata header, and the parent ids.
2283
2372
raise errors.ObjectNotLocked(self)
2286
# Try to load the compiled form if possible
2288
from bzrlib._dirstate_helpers_c import (
2289
_read_dirblocks_c as _read_dirblocks,
2290
bisect_dirblock_c as bisect_dirblock,
2291
_bisect_path_left_c as _bisect_path_left,
2292
_bisect_path_right_c as _bisect_path_right,
2293
cmp_by_dirs_c as cmp_by_dirs,
2296
from bzrlib._dirstate_helpers_py import (
2297
_read_dirblocks_py as _read_dirblocks,
2298
bisect_dirblock_py as bisect_dirblock,
2299
_bisect_path_left_py as _bisect_path_left,
2300
_bisect_path_right_py as _bisect_path_right,
2301
cmp_by_dirs_py as cmp_by_dirs,
2375
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
2376
"""Return the index where to insert dirname into the dirblocks.
2378
The return value idx is such that all directories blocks in dirblock[:idx]
2379
have names < dirname, and all blocks in dirblock[idx:] have names >=
2382
Optional args lo (default 0) and hi (default len(dirblocks)) bound the
2383
slice of a to be searched.
2388
dirname_split = cache[dirname]
2390
dirname_split = dirname.split('/')
2391
cache[dirname] = dirname_split
2394
# Grab the dirname for the current dirblock
2395
cur = dirblocks[mid][0]
2397
cur_split = cache[cur]
2399
cur_split = cur.split('/')
2400
cache[cur] = cur_split
2401
if cur_split < dirname_split: lo = mid+1