~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: John Arbash Meinel
  • Date: 2007-07-11 00:01:54 UTC
  • mto: This revision was merged to the branch mainline in revision 2643.
  • Revision ID: john@arbash-meinel.com-20070711000154-4et8yf8si3jgxmgc
(broken) Try to properly implement DirState._bisect*
Involves rewriting some helper functions.
Currently something is wrong.

Show diffs side-by-side

added added

removed removed

Lines of Context:
199
199
 
200
200
"""
201
201
 
202
 
 
203
202
import bisect
204
203
import binascii
205
204
import errno
423
422
        if self._id_index:
424
423
            self._id_index.setdefault(entry_key[2], set()).add(entry_key)
425
424
 
426
 
    def _bisect(self, dir_name_list):
 
425
    def _bisect(self, paths):
427
426
        """Bisect through the disk structure for specific rows.
428
427
 
429
 
        :param dir_name_list: A list of (dir, name) pairs.
430
 
        :return: A dict mapping (dir, name) => entry for found entries. Missing
 
428
        :param paths: A list of paths to find
 
429
        :return: A dict mapping path => entries for found entries. Missing
431
430
                 entries will not be in the map.
 
431
                 The list is not sorted, and entries will be populated
 
432
                 based on when they were read.
432
433
        """
433
434
        self._requires_lock()
434
435
        # We need the file pointer to be right after the initial header block
454
455
        found = {}
455
456
 
456
457
        # Avoid infinite seeking
457
 
        max_count = 30*len(dir_name_list)
 
458
        max_count = 30*len(paths)
458
459
        count = 0
459
460
        # pending is a list of places to look.
460
461
        # each entry is a tuple of low, high, dir_names
462
463
        #   high -> the last byte offset (inclusive)
463
464
        #   dir_names -> The list of (dir, name) pairs that should be found in
464
465
        #                the [low, high] range
465
 
        pending = [(low, high, dir_name_list)]
 
466
        pending = [(low, high, paths)]
466
467
 
467
468
        page_size = self._bisect_page_size
468
469
 
521
522
                # Find what entries we are looking for, which occur before and
522
523
                # after this first record.
523
524
                after = start
524
 
                first_dir_name = (first_fields[1], first_fields[2])
525
 
                first_loc = bisect.bisect_left(cur_files, first_dir_name)
 
525
                if first_fields[1]:
 
526
                    first_path = first_fields[1] + '/' + first_fields[2]
 
527
                else:
 
528
                    first_path = first_fields[2]
 
529
                first_loc = bisect_path_left(cur_files, first_path)
526
530
 
527
531
                # These exist before the current location
528
532
                pre = cur_files[:first_loc]
545
549
                else:
546
550
                    after = mid + len(block)
547
551
 
548
 
                last_dir_name = (last_fields[1], last_fields[2])
549
 
                last_loc = bisect.bisect_right(post, last_dir_name)
 
552
                if last_fields[1]:
 
553
                    last_path = last_fields[1] + '/' + last_fields[2]
 
554
                else:
 
555
                    last_path = last_fields[2]
 
556
                last_loc = bisect_path_right(post, last_path)
550
557
 
551
558
                middle_files = post[:last_loc]
552
559
                post = post[last_loc:]
557
564
                    # Either we will find them here, or we can mark them as
558
565
                    # missing.
559
566
 
560
 
                    if middle_files[0] == first_dir_name:
 
567
                    if middle_files[0] == first_path:
561
568
                        # We might need to go before this location
562
 
                        pre.append(first_dir_name)
563
 
                    if middle_files[-1] == last_dir_name:
564
 
                        post.insert(0, last_dir_name)
 
569
                        pre.append(first_path)
 
570
                    if middle_files[-1] == last_path:
 
571
                        post.insert(0, last_path)
565
572
 
566
573
                    # Find out what paths we have
567
 
                    paths = {first_dir_name:[first_fields]}
568
 
                    # last_dir_name might == first_dir_name so we need to be
 
574
                    paths = {first_path:[first_fields]}
 
575
                    # last_path might == first_path so we need to be
569
576
                    # careful if we should append rather than overwrite
570
577
                    if last_entry_num != first_entry_num:
571
 
                        paths.setdefault(last_dir_name, []).append(last_fields)
 
578
                        paths.setdefault(last_path, []).append(last_fields)
572
579
                    for num in xrange(first_entry_num+1, last_entry_num):
573
580
                        # TODO: jam 20070223 We are already splitting here, so
574
581
                        #       shouldn't we just split the whole thing rather
575
582
                        #       than doing the split again in add_one_record?
576
583
                        fields = entries[num].split('\0')
577
 
                        dir_name = (fields[1], fields[2])
578
 
                        paths.setdefault(dir_name, []).append(fields)
 
584
                        if fields[1]:
 
585
                            path = fields[1] + '/' + fields[2]
 
586
                        else:
 
587
                            path = fields[2]
 
588
                        paths.setdefault(path, []).append(fields)
579
589
 
580
 
                    for dir_name in middle_files:
581
 
                        for fields in paths.get(dir_name, []):
 
590
                    for path in middle_files:
 
591
                        for fields in paths.get(path, []):
582
592
                            # offset by 1 because of the opening '\0'
583
593
                            # consider changing fields_to_entry to avoid the
584
594
                            # extra list slice
585
595
                            entry = fields_to_entry(fields[1:])
586
 
                            found.setdefault(dir_name, []).append(entry)
 
596
                            found.setdefault(path, []).append(entry)
587
597
 
588
598
            # Now we have split up everything into pre, middle, and post, and
589
599
            # we have handled everything that fell in 'middle'.
705
715
                # after this first record.
706
716
                after = start
707
717
                first_dir = first_fields[1]
708
 
                first_loc = bisect.bisect_left(cur_dirs, first_dir)
 
718
                first_loc = bisect_path_left(cur_dirs, first_dir)
709
719
 
710
720
                # These exist before the current location
711
721
                pre = cur_dirs[:first_loc]
729
739
                    after = mid + len(block)
730
740
 
731
741
                last_dir = last_fields[1]
732
 
                last_loc = bisect.bisect_right(post, last_dir)
 
742
                last_loc = bisect_path_right(post, last_dir)
733
743
 
734
744
                middle_files = post[:last_loc]
735
745
                post = post[last_loc:]
779
789
 
780
790
        return found
781
791
 
782
 
    def _bisect_recursive(self, dir_name_list):
 
792
    def _bisect_recursive(self, paths):
783
793
        """Bisect for entries for all paths and their children.
784
794
 
785
795
        This will use bisect to find all records for the supplied paths. It
798
808
        # Directories that have been read
799
809
        processed_dirs = set()
800
810
        # Get the ball rolling with the first bisect for all entries.
801
 
        newly_found = self._bisect(dir_name_list)
 
811
        newly_found = self._bisect(paths)
802
812
 
803
813
        while newly_found:
804
814
            # Directories that need to be read
2267
2277
    from bzrlib._dirstate_helpers_c import (
2268
2278
        _read_dirblocks_c as _read_dirblocks,
2269
2279
        bisect_dirblock_c as bisect_dirblock,
 
2280
        bisect_path_left_c as bisect_path_left,
 
2281
        bisect_path_right_c as bisect_path_right,
2270
2282
        cmp_by_dirs_c as cmp_by_dirs,
2271
2283
        )
2272
2284
except ImportError:
2273
2285
    from bzrlib._dirstate_helpers_py import (
2274
2286
        _read_dirblocks_py as _read_dirblocks,
2275
2287
        bisect_dirblock_py as bisect_dirblock,
 
2288
        bisect_path_left_py as bisect_path_left,
 
2289
        bisect_path_right_py as bisect_path_right,
2276
2290
        cmp_by_dirs_py as cmp_by_dirs,
2277
2291
        )