~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2007-07-20 19:48:22 UTC
  • mfrom: (2474.1.74 dirstate_pyrex)
  • Revision ID: pqm@pqm.ubuntu.com-20070720194822-smqttk05w6efypf0
(John Arbash Meinel) Implement DirState._read_dirblocks() in pyrex

Show diffs side-by-side

added added

removed removed

Lines of Context:
199
199
 
200
200
"""
201
201
 
202
 
 
 
202
import bisect
203
203
import binascii
204
 
import bisect
205
204
import errno
206
205
import os
207
206
from stat import S_IEXEC
220
219
    )
221
220
 
222
221
 
223
 
class _Bisector(object):
224
 
    """This just keeps track of information as we are bisecting."""
225
 
 
226
 
 
227
222
def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
228
223
    """Convert stat values into a packed representation."""
229
224
    # jam 20060614 it isn't really worth removing more entries if we
462
457
        if self._id_index:
463
458
            self._id_index.setdefault(entry_key[2], set()).add(entry_key)
464
459
 
465
 
    def _bisect(self, dir_name_list):
 
460
    def _bisect(self, paths):
466
461
        """Bisect through the disk structure for specific rows.
467
462
 
468
 
        :param dir_name_list: A list of (dir, name) pairs.
469
 
        :return: A dict mapping (dir, name) => entry for found entries. Missing
 
463
        :param paths: A list of paths to find
 
464
        :return: A dict mapping path => entries for found entries. Missing
470
465
                 entries will not be in the map.
 
466
                 The list is not sorted, and entries will be populated
 
467
                 based on when they were read.
471
468
        """
472
469
        self._requires_lock()
473
470
        # We need the file pointer to be right after the initial header block
493
490
        found = {}
494
491
 
495
492
        # Avoid infinite seeking
496
 
        max_count = 30*len(dir_name_list)
 
493
        max_count = 30*len(paths)
497
494
        count = 0
498
495
        # pending is a list of places to look.
499
496
        # each entry is a tuple of low, high, dir_names
501
498
        #   high -> the last byte offset (inclusive)
502
499
        #   dir_names -> The list of (dir, name) pairs that should be found in
503
500
        #                the [low, high] range
504
 
        pending = [(low, high, dir_name_list)]
 
501
        pending = [(low, high, paths)]
505
502
 
506
503
        page_size = self._bisect_page_size
507
504
 
560
557
                # Find what entries we are looking for, which occur before and
561
558
                # after this first record.
562
559
                after = start
563
 
                first_dir_name = (first_fields[1], first_fields[2])
564
 
                first_loc = bisect.bisect_left(cur_files, first_dir_name)
 
560
                if first_fields[1]:
 
561
                    first_path = first_fields[1] + '/' + first_fields[2]
 
562
                else:
 
563
                    first_path = first_fields[2]
 
564
                first_loc = _bisect_path_left(cur_files, first_path)
565
565
 
566
566
                # These exist before the current location
567
567
                pre = cur_files[:first_loc]
584
584
                else:
585
585
                    after = mid + len(block)
586
586
 
587
 
                last_dir_name = (last_fields[1], last_fields[2])
588
 
                last_loc = bisect.bisect_right(post, last_dir_name)
 
587
                if last_fields[1]:
 
588
                    last_path = last_fields[1] + '/' + last_fields[2]
 
589
                else:
 
590
                    last_path = last_fields[2]
 
591
                last_loc = _bisect_path_right(post, last_path)
589
592
 
590
593
                middle_files = post[:last_loc]
591
594
                post = post[last_loc:]
596
599
                    # Either we will find them here, or we can mark them as
597
600
                    # missing.
598
601
 
599
 
                    if middle_files[0] == first_dir_name:
 
602
                    if middle_files[0] == first_path:
600
603
                        # We might need to go before this location
601
 
                        pre.append(first_dir_name)
602
 
                    if middle_files[-1] == last_dir_name:
603
 
                        post.insert(0, last_dir_name)
 
604
                        pre.append(first_path)
 
605
                    if middle_files[-1] == last_path:
 
606
                        post.insert(0, last_path)
604
607
 
605
608
                    # Find out what paths we have
606
 
                    paths = {first_dir_name:[first_fields]}
607
 
                    # last_dir_name might == first_dir_name so we need to be
 
609
                    paths = {first_path:[first_fields]}
 
610
                    # last_path might == first_path so we need to be
608
611
                    # careful if we should append rather than overwrite
609
612
                    if last_entry_num != first_entry_num:
610
 
                        paths.setdefault(last_dir_name, []).append(last_fields)
 
613
                        paths.setdefault(last_path, []).append(last_fields)
611
614
                    for num in xrange(first_entry_num+1, last_entry_num):
612
615
                        # TODO: jam 20070223 We are already splitting here, so
613
616
                        #       shouldn't we just split the whole thing rather
614
617
                        #       than doing the split again in add_one_record?
615
618
                        fields = entries[num].split('\0')
616
 
                        dir_name = (fields[1], fields[2])
617
 
                        paths.setdefault(dir_name, []).append(fields)
 
619
                        if fields[1]:
 
620
                            path = fields[1] + '/' + fields[2]
 
621
                        else:
 
622
                            path = fields[2]
 
623
                        paths.setdefault(path, []).append(fields)
618
624
 
619
 
                    for dir_name in middle_files:
620
 
                        for fields in paths.get(dir_name, []):
 
625
                    for path in middle_files:
 
626
                        for fields in paths.get(path, []):
621
627
                            # offset by 1 because of the opening '\0'
622
628
                            # consider changing fields_to_entry to avoid the
623
629
                            # extra list slice
624
630
                            entry = fields_to_entry(fields[1:])
625
 
                            found.setdefault(dir_name, []).append(entry)
 
631
                            found.setdefault(path, []).append(entry)
626
632
 
627
633
            # Now we have split up everything into pre, middle, and post, and
628
634
            # we have handled everything that fell in 'middle'.
818
824
 
819
825
        return found
820
826
 
821
 
    def _bisect_recursive(self, dir_name_list):
 
827
    def _bisect_recursive(self, paths):
822
828
        """Bisect for entries for all paths and their children.
823
829
 
824
830
        This will use bisect to find all records for the supplied paths. It
837
843
        # Directories that have been read
838
844
        processed_dirs = set()
839
845
        # Get the ball rolling with the first bisect for all entries.
840
 
        newly_found = self._bisect(dir_name_list)
 
846
        newly_found = self._bisect(paths)
841
847
 
842
848
        while newly_found:
843
849
            # Directories that need to be read
867
873
                            if dir_name[0] in pending_dirs:
868
874
                                # This entry will be found in the dir search
869
875
                                continue
870
 
                            # TODO: We need to check if this entry has
871
 
                            #       already been found. Otherwise we might be
872
 
                            #       hitting infinite recursion.
873
876
                            if dir_name not in found_dir_names:
874
 
                                paths_to_search.add(dir_name)
 
877
                                paths_to_search.add(tree_info[1])
875
878
            # Now we have a list of paths to look for directly, and
876
879
            # directory blocks that need to be read.
877
880
            # newly_found is mixing the keys between (dir, name) and path
1550
1553
        """
1551
1554
        self._read_header_if_needed()
1552
1555
        if self._dirblock_state == DirState.NOT_IN_MEMORY:
1553
 
            # move the _state_file pointer to after the header (in case bisect
1554
 
            # has been called in the mean time)
1555
 
            self._state_file.seek(self._end_of_header)
1556
 
            text = self._state_file.read()
1557
 
            # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
1558
 
 
1559
 
            fields = text.split('\0')
1560
 
            # Remove the last blank entry
1561
 
            trailing = fields.pop()
1562
 
            assert trailing == ''
1563
 
            # consider turning fields into a tuple.
1564
 
 
1565
 
            # skip the first field which is the trailing null from the header.
1566
 
            cur = 1
1567
 
            # Each line now has an extra '\n' field which is not used
1568
 
            # so we just skip over it
1569
 
            # entry size:
1570
 
            #  3 fields for the key
1571
 
            #  + number of fields per tree_data (5) * tree count
1572
 
            #  + newline
1573
 
            num_present_parents = self._num_present_parents()
1574
 
            tree_count = 1 + num_present_parents
1575
 
            entry_size = self._fields_per_entry()
1576
 
            expected_field_count = entry_size * self._num_entries
1577
 
            field_count = len(fields)
1578
 
            # this checks our adjustment, and also catches file too short.
1579
 
            assert field_count - cur == expected_field_count, \
1580
 
                'field count incorrect %s != %s, entry_size=%s, '\
1581
 
                'num_entries=%s fields=%r' % (
1582
 
                    field_count - cur, expected_field_count, entry_size,
1583
 
                    self._num_entries, fields)
1584
 
 
1585
 
            if num_present_parents == 1:
1586
 
                # Bind external functions to local names
1587
 
                _int = int
1588
 
                # We access all fields in order, so we can just iterate over
1589
 
                # them. Grab an straight iterator over the fields. (We use an
1590
 
                # iterator because we don't want to do a lot of additions, nor
1591
 
                # do we want to do a lot of slicing)
1592
 
                next = iter(fields).next
1593
 
                # Move the iterator to the current position
1594
 
                for x in xrange(cur):
1595
 
                    next()
1596
 
                # The two blocks here are deliberate: the root block and the
1597
 
                # contents-of-root block.
1598
 
                self._dirblocks = [('', []), ('', [])]
1599
 
                current_block = self._dirblocks[0][1]
1600
 
                current_dirname = ''
1601
 
                append_entry = current_block.append
1602
 
                for count in xrange(self._num_entries):
1603
 
                    dirname = next()
1604
 
                    name = next()
1605
 
                    file_id = next()
1606
 
                    if dirname != current_dirname:
1607
 
                        # new block - different dirname
1608
 
                        current_block = []
1609
 
                        current_dirname = dirname
1610
 
                        self._dirblocks.append((current_dirname, current_block))
1611
 
                        append_entry = current_block.append
1612
 
                    # we know current_dirname == dirname, so re-use it to avoid
1613
 
                    # creating new strings
1614
 
                    entry = ((current_dirname, name, file_id),
1615
 
                             [(# Current Tree
1616
 
                                 next(),                # minikind
1617
 
                                 next(),                # fingerprint
1618
 
                                 _int(next()),          # size
1619
 
                                 next() == 'y',         # executable
1620
 
                                 next(),                # packed_stat or revision_id
1621
 
                             ),
1622
 
                             ( # Parent 1
1623
 
                                 next(),                # minikind
1624
 
                                 next(),                # fingerprint
1625
 
                                 _int(next()),          # size
1626
 
                                 next() == 'y',         # executable
1627
 
                                 next(),                # packed_stat or revision_id
1628
 
                             ),
1629
 
                             ])
1630
 
                    trailing = next()
1631
 
                    assert trailing == '\n'
1632
 
                    # append the entry to the current block
1633
 
                    append_entry(entry)
1634
 
                self._split_root_dirblock_into_contents()
1635
 
            else:
1636
 
                fields_to_entry = self._get_fields_to_entry()
1637
 
                entries = [fields_to_entry(fields[pos:pos+entry_size])
1638
 
                           for pos in xrange(cur, field_count, entry_size)]
1639
 
                self._entries_to_current_state(entries)
1640
 
            # To convert from format 2  => format 3
1641
 
            # self._dirblocks = sorted(self._dirblocks,
1642
 
            #                          key=lambda blk:blk[0].split('/'))
1643
 
            # To convert from format 3 => format 2
1644
 
            # self._dirblocks = sorted(self._dirblocks)
1645
 
            self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
 
1556
            _read_dirblocks(self)
1646
1557
 
1647
1558
    def _read_header(self):
1648
1559
        """This reads in the metadata header, and the parent ids.
2372
2283
            raise errors.ObjectNotLocked(self)
2373
2284
 
2374
2285
 
2375
 
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
2376
 
    """Return the index where to insert dirname into the dirblocks.
2377
 
 
2378
 
    The return value idx is such that all directories blocks in dirblock[:idx]
2379
 
    have names < dirname, and all blocks in dirblock[idx:] have names >=
2380
 
    dirname.
2381
 
 
2382
 
    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
2383
 
    slice of a to be searched.
2384
 
    """
2385
 
    if hi is None:
2386
 
        hi = len(dirblocks)
2387
 
    try:
2388
 
        dirname_split = cache[dirname]
2389
 
    except KeyError:
2390
 
        dirname_split = dirname.split('/')
2391
 
        cache[dirname] = dirname_split
2392
 
    while lo < hi:
2393
 
        mid = (lo+hi)//2
2394
 
        # Grab the dirname for the current dirblock
2395
 
        cur = dirblocks[mid][0]
2396
 
        try:
2397
 
            cur_split = cache[cur]
2398
 
        except KeyError:
2399
 
            cur_split = cur.split('/')
2400
 
            cache[cur] = cur_split
2401
 
        if cur_split < dirname_split: lo = mid+1
2402
 
        else: hi = mid
2403
 
    return lo
 
2286
# Try to load the compiled form if possible
 
2287
try:
 
2288
    from bzrlib._dirstate_helpers_c import (
 
2289
        _read_dirblocks_c as _read_dirblocks,
 
2290
        bisect_dirblock_c as bisect_dirblock,
 
2291
        _bisect_path_left_c as _bisect_path_left,
 
2292
        _bisect_path_right_c as _bisect_path_right,
 
2293
        cmp_by_dirs_c as cmp_by_dirs,
 
2294
        )
 
2295
except ImportError:
 
2296
    from bzrlib._dirstate_helpers_py import (
 
2297
        _read_dirblocks_py as _read_dirblocks,
 
2298
        bisect_dirblock_py as bisect_dirblock,
 
2299
        _bisect_path_left_py as _bisect_path_left,
 
2300
        _bisect_path_right_py as _bisect_path_right,
 
2301
        cmp_by_dirs_py as cmp_by_dirs,
 
2302
        )