~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Implement _bisect_recursive, which uses multiple bisect calls to
handle renames and finding entries in subdirs.
As is, this could be hooked into paths2ids() if the dirstate has not been loaded yet.
However, it doesn't quite provide enough, since the parsed entries would probably not
be saved. Further, the multiple bisect calls are less efficient then they could be,
because they do not remember the last bisect call.
We should explore switching to a caching structure, which maintains all records that
have been processed, in a structure that can be in-memory searched before going back
to disk.

Show diffs side-by-side

added added

removed removed

Lines of Context:
716
716
 
717
717
        return found
718
718
 
 
719
    def _bisect_recursive(self, dir_name_list):
 
720
        """Bisect for entries for all paths and their children.
 
721
 
 
722
        This will use bisect to find all records for the supplied paths. It
 
723
        will then continue to bisect for any records which are marked as
 
724
        directories. (and renames?)
 
725
 
 
726
        :param paths: A sorted list of (dir, name) pairs
 
727
             eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]
 
728
        :return: A dictionary mapping (dir, name, file_id) => [tree_info]
 
729
        """
 
730
        # Map from (dir, name, file_id) => [tree_info]
 
731
        found = {}
 
732
 
 
733
        found_dir_names = set()
 
734
 
 
735
        # Directories that have been read
 
736
        processed_dirs = set()
 
737
        # Get the ball rolling with the first bisect for all entries.
 
738
        newly_found = self._bisect(dir_name_list)
 
739
 
 
740
        while newly_found:
 
741
            # Directories that need to be read
 
742
            pending_dirs = set()
 
743
            paths_to_search = set()
 
744
            for entry_list in newly_found.itervalues():
 
745
                for dir_name_id, trees_info in entry_list:
 
746
                    found[dir_name_id] = trees_info
 
747
                    found_dir_names.add(dir_name_id[:2])
 
748
                    is_dir = False
 
749
                    for tree_info in trees_info:
 
750
                        minikind = tree_info[0]
 
751
                        if minikind == 'd':
 
752
                            if is_dir:
 
753
                                # We already processed this one as a directory,
 
754
                                # we don't need to do the extra work again.
 
755
                                continue
 
756
                            subdir, name, file_id = dir_name_id
 
757
                            path = osutils.pathjoin(subdir, name)
 
758
                            is_dir = True
 
759
                            if path not in processed_dirs:
 
760
                                pending_dirs.add(path)
 
761
                        elif minikind == 'r':
 
762
                            # Rename, we need to directly search the target
 
763
                            # which is contained in the fingerprint column
 
764
                            dir_name = osutils.split(tree_info[1])
 
765
                            if dir_name[0] in pending_dirs:
 
766
                                # This entry will be found in the dir search
 
767
                                continue
 
768
                            # TODO: We need to check if this entry has
 
769
                            #       already been found. Otherwise we might be
 
770
                            #       hitting infinite recursion.
 
771
                            if dir_name not in found_dir_names:
 
772
                                paths_to_search.add(dir_name)
 
773
            # Now we have a list of paths to look for directly, and
 
774
            # directory blocks that need to be read.
 
775
            # newly_found is mixing the keys between (dir, name) and path
 
776
            # entries, but that is okay, because we only really care about the
 
777
            # targets.
 
778
            newly_found = self._bisect(sorted(paths_to_search))
 
779
            newly_found.update(self._bisect_dirblocks(sorted(pending_dirs)))
 
780
            processed_dirs.update(pending_dirs)
 
781
        return found
 
782
 
719
783
    def _empty_parent_info(self):
720
784
        return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
721
785
                                                    len(self._ghosts))