~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2007-06-28 07:08:27 UTC
  • mfrom: (2553.1.3 integration)
  • Revision ID: pqm@pqm.ubuntu.com-20070628070827-h5s313dg5tnag9vj
(robertc) Show the names of commit hooks during commit.

Show diffs side-by-side

added added

removed removed

Lines of Context:
199
199
 
200
200
"""
201
201
 
 
202
 
 
203
import binascii
202
204
import bisect
203
 
import binascii
204
205
import errno
205
206
import os
206
207
from stat import S_IEXEC
219
220
    )
220
221
 
221
222
 
 
223
class _Bisector(object):
 
224
    """This just keeps track of information as we are bisecting."""
 
225
 
 
226
 
222
227
def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
223
228
    """Convert stat values into a packed representation."""
224
229
    # jam 20060614 it isn't really worth removing more entries if we
457
462
        if self._id_index:
458
463
            self._id_index.setdefault(entry_key[2], set()).add(entry_key)
459
464
 
460
 
    def _bisect(self, paths):
 
465
    def _bisect(self, dir_name_list):
461
466
        """Bisect through the disk structure for specific rows.
462
467
 
463
 
        :param paths: A list of paths to find
464
 
        :return: A dict mapping path => entries for found entries. Missing
 
468
        :param dir_name_list: A list of (dir, name) pairs.
 
469
        :return: A dict mapping (dir, name) => entry for found entries. Missing
465
470
                 entries will not be in the map.
466
 
                 The list is not sorted, and entries will be populated
467
 
                 based on when they were read.
468
471
        """
469
472
        self._requires_lock()
470
473
        # We need the file pointer to be right after the initial header block
490
493
        found = {}
491
494
 
492
495
        # Avoid infinite seeking
493
 
        max_count = 30*len(paths)
 
496
        max_count = 30*len(dir_name_list)
494
497
        count = 0
495
498
        # pending is a list of places to look.
496
499
        # each entry is a tuple of low, high, dir_names
498
501
        #   high -> the last byte offset (inclusive)
499
502
        #   dir_names -> The list of (dir, name) pairs that should be found in
500
503
        #                the [low, high] range
501
 
        pending = [(low, high, paths)]
 
504
        pending = [(low, high, dir_name_list)]
502
505
 
503
506
        page_size = self._bisect_page_size
504
507
 
557
560
                # Find what entries we are looking for, which occur before and
558
561
                # after this first record.
559
562
                after = start
560
 
                if first_fields[1]:
561
 
                    first_path = first_fields[1] + '/' + first_fields[2]
562
 
                else:
563
 
                    first_path = first_fields[2]
564
 
                first_loc = _bisect_path_left(cur_files, first_path)
 
563
                first_dir_name = (first_fields[1], first_fields[2])
 
564
                first_loc = bisect.bisect_left(cur_files, first_dir_name)
565
565
 
566
566
                # These exist before the current location
567
567
                pre = cur_files[:first_loc]
584
584
                else:
585
585
                    after = mid + len(block)
586
586
 
587
 
                if last_fields[1]:
588
 
                    last_path = last_fields[1] + '/' + last_fields[2]
589
 
                else:
590
 
                    last_path = last_fields[2]
591
 
                last_loc = _bisect_path_right(post, last_path)
 
587
                last_dir_name = (last_fields[1], last_fields[2])
 
588
                last_loc = bisect.bisect_right(post, last_dir_name)
592
589
 
593
590
                middle_files = post[:last_loc]
594
591
                post = post[last_loc:]
599
596
                    # Either we will find them here, or we can mark them as
600
597
                    # missing.
601
598
 
602
 
                    if middle_files[0] == first_path:
 
599
                    if middle_files[0] == first_dir_name:
603
600
                        # We might need to go before this location
604
 
                        pre.append(first_path)
605
 
                    if middle_files[-1] == last_path:
606
 
                        post.insert(0, last_path)
 
601
                        pre.append(first_dir_name)
 
602
                    if middle_files[-1] == last_dir_name:
 
603
                        post.insert(0, last_dir_name)
607
604
 
608
605
                    # Find out what paths we have
609
 
                    paths = {first_path:[first_fields]}
610
 
                    # last_path might == first_path so we need to be
 
606
                    paths = {first_dir_name:[first_fields]}
 
607
                    # last_dir_name might == first_dir_name so we need to be
611
608
                    # careful if we should append rather than overwrite
612
609
                    if last_entry_num != first_entry_num:
613
 
                        paths.setdefault(last_path, []).append(last_fields)
 
610
                        paths.setdefault(last_dir_name, []).append(last_fields)
614
611
                    for num in xrange(first_entry_num+1, last_entry_num):
615
612
                        # TODO: jam 20070223 We are already splitting here, so
616
613
                        #       shouldn't we just split the whole thing rather
617
614
                        #       than doing the split again in add_one_record?
618
615
                        fields = entries[num].split('\0')
619
 
                        if fields[1]:
620
 
                            path = fields[1] + '/' + fields[2]
621
 
                        else:
622
 
                            path = fields[2]
623
 
                        paths.setdefault(path, []).append(fields)
 
616
                        dir_name = (fields[1], fields[2])
 
617
                        paths.setdefault(dir_name, []).append(fields)
624
618
 
625
 
                    for path in middle_files:
626
 
                        for fields in paths.get(path, []):
 
619
                    for dir_name in middle_files:
 
620
                        for fields in paths.get(dir_name, []):
627
621
                            # offset by 1 because of the opening '\0'
628
622
                            # consider changing fields_to_entry to avoid the
629
623
                            # extra list slice
630
624
                            entry = fields_to_entry(fields[1:])
631
 
                            found.setdefault(path, []).append(entry)
 
625
                            found.setdefault(dir_name, []).append(entry)
632
626
 
633
627
            # Now we have split up everything into pre, middle, and post, and
634
628
            # we have handled everything that fell in 'middle'.
824
818
 
825
819
        return found
826
820
 
827
 
    def _bisect_recursive(self, paths):
 
821
    def _bisect_recursive(self, dir_name_list):
828
822
        """Bisect for entries for all paths and their children.
829
823
 
830
824
        This will use bisect to find all records for the supplied paths. It
843
837
        # Directories that have been read
844
838
        processed_dirs = set()
845
839
        # Get the ball rolling with the first bisect for all entries.
846
 
        newly_found = self._bisect(paths)
 
840
        newly_found = self._bisect(dir_name_list)
847
841
 
848
842
        while newly_found:
849
843
            # Directories that need to be read
873
867
                            if dir_name[0] in pending_dirs:
874
868
                                # This entry will be found in the dir search
875
869
                                continue
 
870
                            # TODO: We need to check if this entry has
 
871
                            #       already been found. Otherwise we might be
 
872
                            #       hitting infinite recursion.
876
873
                            if dir_name not in found_dir_names:
877
 
                                paths_to_search.add(tree_info[1])
 
874
                                paths_to_search.add(dir_name)
878
875
            # Now we have a list of paths to look for directly, and
879
876
            # directory blocks that need to be read.
880
877
            # newly_found is mixing the keys between (dir, name) and path
1553
1550
        """
1554
1551
        self._read_header_if_needed()
1555
1552
        if self._dirblock_state == DirState.NOT_IN_MEMORY:
1556
 
            _read_dirblocks(self)
 
1553
            # move the _state_file pointer to after the header (in case bisect
 
1554
            # has been called in the mean time)
 
1555
            self._state_file.seek(self._end_of_header)
 
1556
            text = self._state_file.read()
 
1557
            # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
 
1558
 
 
1559
            fields = text.split('\0')
 
1560
            # Remove the last blank entry
 
1561
            trailing = fields.pop()
 
1562
            assert trailing == ''
 
1563
            # consider turning fields into a tuple.
 
1564
 
 
1565
            # skip the first field which is the trailing null from the header.
 
1566
            cur = 1
 
1567
            # Each line now has an extra '\n' field which is not used
 
1568
            # so we just skip over it
 
1569
            # entry size:
 
1570
            #  3 fields for the key
 
1571
            #  + number of fields per tree_data (5) * tree count
 
1572
            #  + newline
 
1573
            num_present_parents = self._num_present_parents()
 
1574
            tree_count = 1 + num_present_parents
 
1575
            entry_size = self._fields_per_entry()
 
1576
            expected_field_count = entry_size * self._num_entries
 
1577
            field_count = len(fields)
 
1578
            # this checks our adjustment, and also catches file too short.
 
1579
            assert field_count - cur == expected_field_count, \
 
1580
                'field count incorrect %s != %s, entry_size=%s, '\
 
1581
                'num_entries=%s fields=%r' % (
 
1582
                    field_count - cur, expected_field_count, entry_size,
 
1583
                    self._num_entries, fields)
 
1584
 
 
1585
            if num_present_parents == 1:
 
1586
                # Bind external functions to local names
 
1587
                _int = int
 
1588
                # We access all fields in order, so we can just iterate over
 
1589
                # them. Grab an straight iterator over the fields. (We use an
 
1590
                # iterator because we don't want to do a lot of additions, nor
 
1591
                # do we want to do a lot of slicing)
 
1592
                next = iter(fields).next
 
1593
                # Move the iterator to the current position
 
1594
                for x in xrange(cur):
 
1595
                    next()
 
1596
                # The two blocks here are deliberate: the root block and the
 
1597
                # contents-of-root block.
 
1598
                self._dirblocks = [('', []), ('', [])]
 
1599
                current_block = self._dirblocks[0][1]
 
1600
                current_dirname = ''
 
1601
                append_entry = current_block.append
 
1602
                for count in xrange(self._num_entries):
 
1603
                    dirname = next()
 
1604
                    name = next()
 
1605
                    file_id = next()
 
1606
                    if dirname != current_dirname:
 
1607
                        # new block - different dirname
 
1608
                        current_block = []
 
1609
                        current_dirname = dirname
 
1610
                        self._dirblocks.append((current_dirname, current_block))
 
1611
                        append_entry = current_block.append
 
1612
                    # we know current_dirname == dirname, so re-use it to avoid
 
1613
                    # creating new strings
 
1614
                    entry = ((current_dirname, name, file_id),
 
1615
                             [(# Current Tree
 
1616
                                 next(),                # minikind
 
1617
                                 next(),                # fingerprint
 
1618
                                 _int(next()),          # size
 
1619
                                 next() == 'y',         # executable
 
1620
                                 next(),                # packed_stat or revision_id
 
1621
                             ),
 
1622
                             ( # Parent 1
 
1623
                                 next(),                # minikind
 
1624
                                 next(),                # fingerprint
 
1625
                                 _int(next()),          # size
 
1626
                                 next() == 'y',         # executable
 
1627
                                 next(),                # packed_stat or revision_id
 
1628
                             ),
 
1629
                             ])
 
1630
                    trailing = next()
 
1631
                    assert trailing == '\n'
 
1632
                    # append the entry to the current block
 
1633
                    append_entry(entry)
 
1634
                self._split_root_dirblock_into_contents()
 
1635
            else:
 
1636
                fields_to_entry = self._get_fields_to_entry()
 
1637
                entries = [fields_to_entry(fields[pos:pos+entry_size])
 
1638
                           for pos in xrange(cur, field_count, entry_size)]
 
1639
                self._entries_to_current_state(entries)
 
1640
            # To convert from format 2  => format 3
 
1641
            # self._dirblocks = sorted(self._dirblocks,
 
1642
            #                          key=lambda blk:blk[0].split('/'))
 
1643
            # To convert from format 3 => format 2
 
1644
            # self._dirblocks = sorted(self._dirblocks)
 
1645
            self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
1557
1646
 
1558
1647
    def _read_header(self):
1559
1648
        """This reads in the metadata header, and the parent ids.
2283
2372
            raise errors.ObjectNotLocked(self)
2284
2373
 
2285
2374
 
2286
 
# Try to load the compiled form if possible
2287
 
try:
2288
 
    from bzrlib._dirstate_helpers_c import (
2289
 
        _read_dirblocks_c as _read_dirblocks,
2290
 
        bisect_dirblock_c as bisect_dirblock,
2291
 
        _bisect_path_left_c as _bisect_path_left,
2292
 
        _bisect_path_right_c as _bisect_path_right,
2293
 
        cmp_by_dirs_c as cmp_by_dirs,
2294
 
        )
2295
 
except ImportError:
2296
 
    from bzrlib._dirstate_helpers_py import (
2297
 
        _read_dirblocks_py as _read_dirblocks,
2298
 
        bisect_dirblock_py as bisect_dirblock,
2299
 
        _bisect_path_left_py as _bisect_path_left,
2300
 
        _bisect_path_right_py as _bisect_path_right,
2301
 
        cmp_by_dirs_py as cmp_by_dirs,
2302
 
        )
 
2375
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
 
2376
    """Return the index where to insert dirname into the dirblocks.
 
2377
 
 
2378
    The return value idx is such that all directories blocks in dirblock[:idx]
 
2379
    have names < dirname, and all blocks in dirblock[idx:] have names >=
 
2380
    dirname.
 
2381
 
 
2382
    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
 
2383
    slice of a to be searched.
 
2384
    """
 
2385
    if hi is None:
 
2386
        hi = len(dirblocks)
 
2387
    try:
 
2388
        dirname_split = cache[dirname]
 
2389
    except KeyError:
 
2390
        dirname_split = dirname.split('/')
 
2391
        cache[dirname] = dirname_split
 
2392
    while lo < hi:
 
2393
        mid = (lo+hi)//2
 
2394
        # Grab the dirname for the current dirblock
 
2395
        cur = dirblocks[mid][0]
 
2396
        try:
 
2397
            cur_split = cache[cur]
 
2398
        except KeyError:
 
2399
            cur_split = cur.split('/')
 
2400
            cache[cur] = cur_split
 
2401
        if cur_split < dirname_split: lo = mid+1
 
2402
        else: hi = mid
 
2403
    return lo