~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2011-05-11 02:48:31 UTC
  • mfrom: (5847.1.6 2.4-uncommit-faster)
  • Revision ID: pqm@pqm.ubuntu.com-20110511024831-tm38ubce8znnq351
(jameinel) Make set_parent_trees() ~5% faster by improving the sort key
 function. (John A Meinel)

Show diffs side-by-side

added added

removed removed

Lines of Context:
2141
2141
            executable = False
2142
2142
        else:
2143
2143
            raise Exception("can't pack %s" % inv_entry)
2144
 
        return (minikind, fingerprint, size, executable, tree_data)
 
2144
        return static_tuple.StaticTuple(minikind, fingerprint, size,
 
2145
                                        executable, tree_data)
2145
2146
 
2146
2147
    def _iter_child_entries(self, tree_index, path_utf8):
2147
2148
        """Iterate over all the entries that are children of path_utf.
2525
2526
        parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]
2526
2527
        # how many trees do we end up with
2527
2528
        parent_count = len(parent_trees)
 
2529
        st = static_tuple.StaticTuple
2528
2530
 
2529
2531
        # one: the current tree
2530
2532
        for entry in self._iter_entries():
2547
2549
            # the suffix is from tree_index+1:parent_count+1.
2548
2550
            new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
2549
2551
            # now stitch in all the entries from this tree
 
2552
            last_dirname = None
2550
2553
            for path, entry in tree.iter_entries_by_dir():
2551
2554
                # here we process each trees details for each item in the tree.
2552
2555
                # we first update any existing entries for the id at other paths,
2560
2563
                file_id = entry.file_id
2561
2564
                path_utf8 = path.encode('utf8')
2562
2565
                dirname, basename = osutils.split(path_utf8)
2563
 
                new_entry_key = (dirname, basename, file_id)
 
2566
                if dirname == last_dirname:
 
2567
                    # Try to re-use objects as much as possible
 
2568
                    dirname = last_dirname
 
2569
                else:
 
2570
                    last_dirname = dirname
 
2571
                new_entry_key = st(dirname, basename, file_id)
2564
2572
                # tree index consistency: All other paths for this id in this tree
2565
2573
                # index must point to the correct path.
2566
 
                for entry_key in id_index.get(file_id, ()):
 
2574
                entry_keys = id_index.get(file_id, ())
 
2575
                for entry_key in entry_keys:
2567
2576
                    # TODO:PROFILING: It might be faster to just update
2568
2577
                    # rather than checking if we need to, and then overwrite
2569
2578
                    # the one we are located at.
2572
2581
                        # other trees, so put absent pointers there
2573
2582
                        # This is the vertical axis in the matrix, all pointing
2574
2583
                        # to the real path.
2575
 
                        by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2576
 
                # by path consistency: Insert into an existing path record (trivial), or
2577
 
                # add a new one with relocation pointers for the other tree indexes.
2578
 
                entry_keys = id_index.get(file_id, ())
 
2584
                        by_path[entry_key][tree_index] = st('r', path_utf8, 0,
 
2585
                                                            False, '')
 
2586
                # by path consistency: Insert into an existing path record
 
2587
                # (trivial), or add a new one with relocation pointers for the
 
2588
                # other tree indexes.
2579
2589
                if new_entry_key in entry_keys:
2580
 
                    # there is already an entry where this data belongs, just insert it.
 
2590
                    # there is already an entry where this data belongs, just
 
2591
                    # insert it.
2581
2592
                    by_path[new_entry_key][tree_index] = \
2582
2593
                        self._inv_entry_to_details(entry)
2583
2594
                else:
2593
2604
                            new_details.append(DirState.NULL_PARENT_DETAILS)
2594
2605
                        else:
2595
2606
                            # grab any one entry, use it to find the right path.
2596
 
                            # TODO: optimise this to reduce memory use in highly
2597
 
                            # fragmented situations by reusing the relocation
2598
 
                            # records.
2599
2607
                            a_key = iter(entry_keys).next()
2600
2608
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
2601
 
                                # its a pointer or missing statement, use it as is.
 
2609
                                # its a pointer or missing statement, use it as
 
2610
                                # is.
2602
2611
                                new_details.append(by_path[a_key][lookup_index])
2603
2612
                            else:
2604
2613
                                # we have the right key, make a pointer to it.
2605
2614
                                real_path = ('/'.join(a_key[0:2])).strip('/')
2606
 
                                new_details.append(('r', real_path, 0, False, ''))
 
2615
                                new_details.append(st('r', real_path, 0, False,
 
2616
                                                      ''))
2607
2617
                    new_details.append(self._inv_entry_to_details(entry))
2608
2618
                    new_details.extend(new_location_suffix)
2609
2619
                    by_path[new_entry_key] = new_details
2625
2635
        try to keep everything in sorted blocks all the time, but sometimes
2626
2636
        it's easier to sort after the fact.
2627
2637
        """
2628
 
        def _key(entry):
 
2638
        # When sorting, we usually have 10x more entries than directories. (69k
 
2639
        # total entries, 4k directories). So cache the results of splitting.
 
2640
        # Saving time and objects. Also, use StaticTuple to avoid putting all
 
2641
        # of these object into python's garbage collector.
 
2642
        split_dirs = {}
 
2643
        def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple):
2629
2644
            # sort by: directory parts, file name, file id
2630
 
            return entry[0][0].split('/'), entry[0][1], entry[0][2]
 
2645
            dirpath, fname, file_id = entry[0]
 
2646
            try:
 
2647
                split = _split_dirs[dirpath]
 
2648
            except KeyError:
 
2649
                split = _st.from_sequence(dirpath.split('/'))
 
2650
                _split_dirs[dirpath] = split
 
2651
            return _st(split, fname, file_id)
2631
2652
        return sorted(entry_list, key=_key)
2632
2653
 
2633
2654
    def set_state_from_inventory(self, new_inv):