462
457
if self._id_index:
463
458
self._id_index.setdefault(entry_key[2], set()).add(entry_key)
465
def _bisect(self, dir_name_list):
460
def _bisect(self, paths):
466
461
"""Bisect through the disk structure for specific rows.
468
:param dir_name_list: A list of (dir, name) pairs.
469
:return: A dict mapping (dir, name) => entry for found entries. Missing
463
:param paths: A list of paths to find
464
:return: A dict mapping path => entries for found entries. Missing
470
465
entries will not be in the map.
466
The list is not sorted, and entries will be populated
467
based on when they were read.
472
469
self._requires_lock()
473
470
# We need the file pointer to be right after the initial header block
596
599
# Either we will find them here, or we can mark them as
599
if middle_files[0] == first_dir_name:
602
if middle_files[0] == first_path:
600
603
# We might need to go before this location
601
pre.append(first_dir_name)
602
if middle_files[-1] == last_dir_name:
603
post.insert(0, last_dir_name)
604
pre.append(first_path)
605
if middle_files[-1] == last_path:
606
post.insert(0, last_path)
605
608
# Find out what paths we have
606
paths = {first_dir_name:[first_fields]}
607
# last_dir_name might == first_dir_name so we need to be
609
paths = {first_path:[first_fields]}
610
# last_path might == first_path so we need to be
608
611
# careful if we should append rather than overwrite
609
612
if last_entry_num != first_entry_num:
610
paths.setdefault(last_dir_name, []).append(last_fields)
613
paths.setdefault(last_path, []).append(last_fields)
611
614
for num in xrange(first_entry_num+1, last_entry_num):
612
615
# TODO: jam 20070223 We are already splitting here, so
613
616
# shouldn't we just split the whole thing rather
614
617
# than doing the split again in add_one_record?
615
618
fields = entries[num].split('\0')
616
dir_name = (fields[1], fields[2])
617
paths.setdefault(dir_name, []).append(fields)
620
path = fields[1] + '/' + fields[2]
623
paths.setdefault(path, []).append(fields)
619
for dir_name in middle_files:
620
for fields in paths.get(dir_name, []):
625
for path in middle_files:
626
for fields in paths.get(path, []):
621
627
# offset by 1 because of the opening '\0'
622
628
# consider changing fields_to_entry to avoid the
623
629
# extra list slice
624
630
entry = fields_to_entry(fields[1:])
625
found.setdefault(dir_name, []).append(entry)
631
found.setdefault(path, []).append(entry)
627
633
# Now we have split up everything into pre, middle, and post, and
628
634
# we have handled everything that fell in 'middle'.
867
873
if dir_name[0] in pending_dirs:
868
874
# This entry will be found in the dir search
870
# TODO: We need to check if this entry has
871
# already been found. Otherwise we might be
872
# hitting infinite recursion.
873
876
if dir_name not in found_dir_names:
874
paths_to_search.add(dir_name)
877
paths_to_search.add(tree_info[1])
875
878
# Now we have a list of paths to look for directly, and
876
879
# directory blocks that need to be read.
877
880
# newly_found is mixing the keys between (dir, name) and path
1551
1554
self._read_header_if_needed()
1552
1555
if self._dirblock_state == DirState.NOT_IN_MEMORY:
1553
# move the _state_file pointer to after the header (in case bisect
1554
# has been called in the mean time)
1555
self._state_file.seek(self._end_of_header)
1556
text = self._state_file.read()
1557
# TODO: check the crc checksums. crc_measured = zlib.crc32(text)
1559
fields = text.split('\0')
1560
# Remove the last blank entry
1561
trailing = fields.pop()
1562
assert trailing == ''
1563
# consider turning fields into a tuple.
1565
# skip the first field which is the trailing null from the header.
1567
# Each line now has an extra '\n' field which is not used
1568
# so we just skip over it
1570
# 3 fields for the key
1571
# + number of fields per tree_data (5) * tree count
1573
num_present_parents = self._num_present_parents()
1574
tree_count = 1 + num_present_parents
1575
entry_size = self._fields_per_entry()
1576
expected_field_count = entry_size * self._num_entries
1577
field_count = len(fields)
1578
# this checks our adjustment, and also catches file too short.
1579
assert field_count - cur == expected_field_count, \
1580
'field count incorrect %s != %s, entry_size=%s, '\
1581
'num_entries=%s fields=%r' % (
1582
field_count - cur, expected_field_count, entry_size,
1583
self._num_entries, fields)
1585
if num_present_parents == 1:
1586
# Bind external functions to local names
1588
# We access all fields in order, so we can just iterate over
1589
# them. Grab an straight iterator over the fields. (We use an
1590
# iterator because we don't want to do a lot of additions, nor
1591
# do we want to do a lot of slicing)
1592
next = iter(fields).next
1593
# Move the iterator to the current position
1594
for x in xrange(cur):
1596
# The two blocks here are deliberate: the root block and the
1597
# contents-of-root block.
1598
self._dirblocks = [('', []), ('', [])]
1599
current_block = self._dirblocks[0][1]
1600
current_dirname = ''
1601
append_entry = current_block.append
1602
for count in xrange(self._num_entries):
1606
if dirname != current_dirname:
1607
# new block - different dirname
1609
current_dirname = dirname
1610
self._dirblocks.append((current_dirname, current_block))
1611
append_entry = current_block.append
1612
# we know current_dirname == dirname, so re-use it to avoid
1613
# creating new strings
1614
entry = ((current_dirname, name, file_id),
1617
next(), # fingerprint
1618
_int(next()), # size
1619
next() == 'y', # executable
1620
next(), # packed_stat or revision_id
1624
next(), # fingerprint
1625
_int(next()), # size
1626
next() == 'y', # executable
1627
next(), # packed_stat or revision_id
1631
assert trailing == '\n'
1632
# append the entry to the current block
1634
self._split_root_dirblock_into_contents()
1636
fields_to_entry = self._get_fields_to_entry()
1637
entries = [fields_to_entry(fields[pos:pos+entry_size])
1638
for pos in xrange(cur, field_count, entry_size)]
1639
self._entries_to_current_state(entries)
1640
# To convert from format 2 => format 3
1641
# self._dirblocks = sorted(self._dirblocks,
1642
# key=lambda blk:blk[0].split('/'))
1643
# To convert from format 3 => format 2
1644
# self._dirblocks = sorted(self._dirblocks)
1645
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
1556
_read_dirblocks(self)
1647
1558
def _read_header(self):
1648
1559
"""This reads in the metadata header, and the parent ids.
2372
2283
raise errors.ObjectNotLocked(self)
2375
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
2376
"""Return the index where to insert dirname into the dirblocks.
2378
The return value idx is such that all directories blocks in dirblock[:idx]
2379
have names < dirname, and all blocks in dirblock[idx:] have names >=
2382
Optional args lo (default 0) and hi (default len(dirblocks)) bound the
2383
slice of a to be searched.
2388
dirname_split = cache[dirname]
2390
dirname_split = dirname.split('/')
2391
cache[dirname] = dirname_split
2394
# Grab the dirname for the current dirblock
2395
cur = dirblocks[mid][0]
2397
cur_split = cache[cur]
2399
cur_split = cur.split('/')
2400
cache[cur] = cur_split
2401
if cur_split < dirname_split: lo = mid+1
2286
# Try to load the compiled form if possible
2288
from bzrlib._dirstate_helpers_c import (
2289
_read_dirblocks_c as _read_dirblocks,
2290
bisect_dirblock_c as bisect_dirblock,
2291
_bisect_path_left_c as _bisect_path_left,
2292
_bisect_path_right_c as _bisect_path_right,
2293
cmp_by_dirs_c as cmp_by_dirs,
2296
from bzrlib._dirstate_helpers_py import (
2297
_read_dirblocks_py as _read_dirblocks,
2298
bisect_dirblock_py as bisect_dirblock,
2299
_bisect_path_left_py as _bisect_path_left,
2300
_bisect_path_right_py as _bisect_path_right,
2301
cmp_by_dirs_py as cmp_by_dirs,