~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/workingtree_4.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2007-05-31 21:08:33 UTC
  • mfrom: (2485.3.14 dirstate_optim_2)
  • Revision ID: pqm@pqm.ubuntu.com-20070531210833-8ptk86ocu822hjd5
(John Arbash Meinel, r=robert) Tweak _iter_changes and update_entry to make bzr status and bzr diff much faster

Show diffs side-by-side

added added

removed removed

Lines of Context:
413
413
 
414
414
        file_abspath = self.abspath(path)
415
415
        state = self.current_dirstate()
 
416
        if stat_value is None:
 
417
            stat_value = os.lstat(file_abspath)
416
418
        link_or_sha1 = state.update_entry(entry, file_abspath,
417
419
                                          stat_value=stat_value)
418
420
        if entry[1][0][0] == 'f':
1666
1668
            output. An unversioned file is defined as one with (False, False)
1667
1669
            for the versioned pair.
1668
1670
        """
1669
 
        utf8_decode_or_none = cache_utf8._utf8_decode_with_None
 
1671
        utf8_decode = cache_utf8._utf8_decode
1670
1672
        _minikind_to_kind = dirstate.DirState._minikind_to_kind
1671
1673
        # NB: show_status depends on being able to pass in non-versioned files
1672
1674
        # and report them as unknown
1800
1802
        NULL_PARENT_DETAILS = dirstate.DirState.NULL_PARENT_DETAILS
1801
1803
        # Using a list so that we can access the values and change them in
1802
1804
        # nested scope. Each one is [path, file_id, entry]
1803
 
        last_source_parent = [None, None, None]
1804
 
        last_target_parent = [None, None, None]
 
1805
        last_source_parent = [None, None]
 
1806
        last_target_parent = [None, None]
1805
1807
 
1806
1808
        use_filesystem_for_exec = (sys.platform != 'win32')
1807
1809
 
 
1810
        # Just a sentry, so that _process_entry can say that this
 
1811
        # record is handled, but isn't interesting to process (unchanged)
 
1812
        uninteresting = object()
 
1813
 
 
1814
 
 
1815
        old_dirname_to_file_id = {}
 
1816
        new_dirname_to_file_id = {}
 
1817
        # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
 
1818
        #       keeping a cache of directories that we have seen.
 
1819
 
1808
1820
        def _process_entry(entry, path_info):
1809
1821
            """Compare an entry and real disk to generate delta information.
1810
1822
 
1814
1826
                Basename is returned as a utf8 string because we expect this
1815
1827
                tuple will be ignored, and don't want to take the time to
1816
1828
                decode.
 
1829
            :return: None if these don't match
 
1830
                     A tuple of information about the change, or
 
1831
                     the object 'uninteresting' if these match, but are
 
1832
                     basically identical.
1817
1833
            """
1818
1834
            if source_index is None:
1819
1835
                source_details = NULL_PARENT_DETAILS
1830
1846
                target_minikind = target_details[0]
1831
1847
            else:
1832
1848
                link_or_sha1 = None
 
1849
            file_id = entry[0][2]
1833
1850
            source_minikind = source_details[0]
1834
1851
            if source_minikind in 'fdltr' and target_minikind in 'fdlt':
1835
1852
                # claimed content in both: diff
1857
1874
                else:
1858
1875
                    old_dirname = entry[0][0]
1859
1876
                    old_basename = entry[0][1]
1860
 
                    old_path = path = pathjoin(old_dirname, old_basename)
 
1877
                    old_path = path = None
1861
1878
                if path_info is None:
1862
1879
                    # the file is missing on disk, show as removed.
1863
1880
                    content_change = True
1867
1884
                    # source and target are both versioned and disk file is present.
1868
1885
                    target_kind = path_info[2]
1869
1886
                    if target_kind == 'directory':
 
1887
                        if path is None:
 
1888
                            old_path = path = pathjoin(old_dirname, old_basename)
 
1889
                        new_dirname_to_file_id[path] = file_id
1870
1890
                        if source_minikind != 'd':
1871
1891
                            content_change = True
1872
1892
                        else:
1901
1921
                        target_exec = False
1902
1922
                    else:
1903
1923
                        raise Exception, "unknown kind %s" % path_info[2]
 
1924
                if source_minikind == 'd':
 
1925
                    if path is None:
 
1926
                        old_path = path = pathjoin(old_dirname, old_basename)
 
1927
                    old_dirname_to_file_id[old_path] = file_id
1904
1928
                # parent id is the entry for the path in the target tree
1905
1929
                if old_dirname == last_source_parent[0]:
1906
1930
                    source_parent_id = last_source_parent[1]
1907
1931
                else:
1908
 
                    source_parent_entry = state._get_entry(source_index,
1909
 
                                                           path_utf8=old_dirname)
1910
 
                    source_parent_id = source_parent_entry[0][2]
 
1932
                    try:
 
1933
                        source_parent_id = old_dirname_to_file_id[old_dirname]
 
1934
                    except KeyError:
 
1935
                        source_parent_entry = state._get_entry(source_index,
 
1936
                                                               path_utf8=old_dirname)
 
1937
                        source_parent_id = source_parent_entry[0][2]
1911
1938
                    if source_parent_id == entry[0][2]:
1912
1939
                        # This is the root, so the parent is None
1913
1940
                        source_parent_id = None
1914
1941
                    else:
1915
1942
                        last_source_parent[0] = old_dirname
1916
1943
                        last_source_parent[1] = source_parent_id
1917
 
                        last_source_parent[2] = source_parent_entry
1918
1944
                new_dirname = entry[0][0]
1919
1945
                if new_dirname == last_target_parent[0]:
1920
1946
                    target_parent_id = last_target_parent[1]
1921
1947
                else:
1922
 
                    # TODO: We don't always need to do the lookup, because the
1923
 
                    #       parent entry will be the same as the source entry.
1924
 
                    target_parent_entry = state._get_entry(target_index,
1925
 
                                                           path_utf8=new_dirname)
1926
 
                    assert target_parent_entry != (None, None), (
1927
 
                        "Could not find target parent in wt: %s\nparent of: %s"
1928
 
                        % (new_dirname, entry))
1929
 
                    target_parent_id = target_parent_entry[0][2]
 
1948
                    try:
 
1949
                        target_parent_id = new_dirname_to_file_id[new_dirname]
 
1950
                    except KeyError:
 
1951
                        # TODO: We don't always need to do the lookup, because the
 
1952
                        #       parent entry will be the same as the source entry.
 
1953
                        target_parent_entry = state._get_entry(target_index,
 
1954
                                                               path_utf8=new_dirname)
 
1955
                        assert target_parent_entry != (None, None), (
 
1956
                            "Could not find target parent in wt: %s\nparent of: %s"
 
1957
                            % (new_dirname, entry))
 
1958
                        target_parent_id = target_parent_entry[0][2]
1930
1959
                    if target_parent_id == entry[0][2]:
1931
1960
                        # This is the root, so the parent is None
1932
1961
                        target_parent_id = None
1933
1962
                    else:
1934
1963
                        last_target_parent[0] = new_dirname
1935
1964
                        last_target_parent[1] = target_parent_id
1936
 
                        last_target_parent[2] = target_parent_entry
1937
1965
 
1938
1966
                source_exec = source_details[3]
1939
 
                return ((entry[0][2], (old_path, path), content_change,
1940
 
                        (True, True),
1941
 
                        (source_parent_id, target_parent_id),
1942
 
                        (old_basename, entry[0][1]),
1943
 
                        (_minikind_to_kind[source_minikind], target_kind),
1944
 
                        (source_exec, target_exec)),)
 
1967
                if (include_unchanged
 
1968
                    or content_change
 
1969
                    or source_parent_id != target_parent_id
 
1970
                    or old_basename != entry[0][1]
 
1971
                    or source_exec != target_exec
 
1972
                    ):
 
1973
                    if old_path is None:
 
1974
                        old_path = path = pathjoin(old_dirname, old_basename)
 
1975
                        old_path_u = utf8_decode(old_path)[0]
 
1976
                        path_u = old_path_u
 
1977
                    else:
 
1978
                        old_path_u = utf8_decode(old_path)[0]
 
1979
                        if old_path == path:
 
1980
                            path_u = old_path_u
 
1981
                        else:
 
1982
                            path_u = utf8_decode(path)[0]
 
1983
                    source_kind = _minikind_to_kind[source_minikind]
 
1984
                    return (entry[0][2],
 
1985
                           (old_path_u, path_u),
 
1986
                           content_change,
 
1987
                           (True, True),
 
1988
                           (source_parent_id, target_parent_id),
 
1989
                           (utf8_decode(old_basename)[0], utf8_decode(entry[0][1])[0]),
 
1990
                           (source_kind, target_kind),
 
1991
                           (source_exec, target_exec))
 
1992
                else:
 
1993
                    return uninteresting
1945
1994
            elif source_minikind in 'a' and target_minikind in 'fdlt':
1946
1995
                # looks like a new file
1947
1996
                if path_info is not None:
1960
2009
                            and stat.S_IEXEC & path_info[3].st_mode)
1961
2010
                    else:
1962
2011
                        target_exec = target_details[3]
1963
 
                    return ((entry[0][2], (None, path), True,
1964
 
                            (False, True),
1965
 
                            (None, parent_id),
1966
 
                            (None, entry[0][1]),
1967
 
                            (None, path_info[2]),
1968
 
                            (None, target_exec)),)
 
2012
                    return (entry[0][2],
 
2013
                           (None, utf8_decode(path)[0]),
 
2014
                           True,
 
2015
                           (False, True),
 
2016
                           (None, parent_id),
 
2017
                           (None, utf8_decode(entry[0][1])[0]),
 
2018
                           (None, path_info[2]),
 
2019
                           (None, target_exec))
1969
2020
                else:
1970
2021
                    # but its not on disk: we deliberately treat this as just
1971
2022
                    # never-present. (Why ?! - RBC 20070224)
1980
2031
                parent_id = state._get_entry(source_index, path_utf8=entry[0][0])[0][2]
1981
2032
                if parent_id == entry[0][2]:
1982
2033
                    parent_id = None
1983
 
                return ((entry[0][2], (old_path, None), True,
1984
 
                        (True, False),
1985
 
                        (parent_id, None),
1986
 
                        (entry[0][1], None),
1987
 
                        (_minikind_to_kind[source_minikind], None),
1988
 
                        (source_details[3], None)),)
 
2034
                return (entry[0][2],
 
2035
                       (utf8_decode(old_path)[0], None),
 
2036
                       True,
 
2037
                       (True, False),
 
2038
                       (parent_id, None),
 
2039
                       (utf8_decode(entry[0][1])[0], None),
 
2040
                       (_minikind_to_kind[source_minikind], None),
 
2041
                       (source_details[3], None))
1989
2042
            elif source_minikind in 'fdlt' and target_minikind in 'r':
1990
2043
                # a rename; could be a true rename, or a rename inherited from
1991
2044
                # a renamed parent. TODO: handle this efficiently. Its not
2003
2056
                    "source_minikind=%r, target_minikind=%r"
2004
2057
                    % (source_minikind, target_minikind))
2005
2058
                ## import pdb;pdb.set_trace()
2006
 
            return ()
 
2059
            return None
2007
2060
 
2008
2061
        while search_specific_files:
2009
2062
            # TODO: the pending list should be lexically sorted?  the
2039
2092
                continue
2040
2093
            path_handled = False
2041
2094
            for entry in root_entries:
2042
 
                for result in _process_entry(entry, root_dir_info):
2043
 
                    # this check should probably be outside the loop: one
2044
 
                    # 'iterate two trees' api, and then _iter_changes filters
2045
 
                    # unchanged pairs. - RBC 20070226
 
2095
                result = _process_entry(entry, root_dir_info)
 
2096
                if result is not None:
2046
2097
                    path_handled = True
2047
 
                    if (include_unchanged
2048
 
                        or result[2]                    # content change
2049
 
                        or result[3][0] != result[3][1] # versioned status
2050
 
                        or result[4][0] != result[4][1] # parent id
2051
 
                        or result[5][0] != result[5][1] # name
2052
 
                        or result[6][0] != result[6][1] # kind
2053
 
                        or result[7][0] != result[7][1] # executable
2054
 
                        ):
2055
 
                        yield (result[0],
2056
 
                               (utf8_decode_or_none(result[1][0]),
2057
 
                                utf8_decode_or_none(result[1][1])),
2058
 
                               result[2],
2059
 
                               result[3],
2060
 
                               result[4],
2061
 
                               (utf8_decode_or_none(result[5][0]),
2062
 
                                utf8_decode_or_none(result[5][1])),
2063
 
                               result[6],
2064
 
                               result[7],
2065
 
                              )
 
2098
                    if result is not uninteresting:
 
2099
                        yield result
2066
2100
            if want_unversioned and not path_handled and root_dir_info:
2067
2101
                new_executable = bool(
2068
2102
                    stat.S_ISREG(root_dir_info[3].st_mode)
2145
2179
                                        stat.S_ISREG(current_path_info[3].st_mode)
2146
2180
                                        and stat.S_IEXEC & current_path_info[3].st_mode)
2147
2181
                                    yield (None,
2148
 
                                        (None, utf8_decode_or_none(current_path_info[0])),
 
2182
                                        (None, utf8_decode(current_path_info[0])[0]),
2149
2183
                                        True,
2150
2184
                                        (False, False),
2151
2185
                                        (None, None),
2152
 
                                        (None, utf8_decode_or_none(current_path_info[1])),
 
2186
                                        (None, utf8_decode(current_path_info[1])[0]),
2153
2187
                                        (None, current_path_info[2]),
2154
2188
                                        (None, new_executable))
2155
2189
                                # dont descend into this unversioned path if it is
2176
2210
                        for current_entry in current_block[1]:
2177
2211
                            # entry referring to file not present on disk.
2178
2212
                            # advance the entry only, after processing.
2179
 
                            for result in _process_entry(current_entry, None):
2180
 
                                # this check should probably be outside the loop: one
2181
 
                                # 'iterate two trees' api, and then _iter_changes filters
2182
 
                                # unchanged pairs. - RBC 20070226
2183
 
                                if (include_unchanged
2184
 
                                    or result[2]                    # content change
2185
 
                                    or result[3][0] != result[3][1] # versioned status
2186
 
                                    or result[4][0] != result[4][1] # parent id
2187
 
                                    or result[5][0] != result[5][1] # name
2188
 
                                    or result[6][0] != result[6][1] # kind
2189
 
                                    or result[7][0] != result[7][1] # executable
2190
 
                                    ):
2191
 
                                    yield (result[0],
2192
 
                                           (utf8_decode_or_none(result[1][0]),
2193
 
                                            utf8_decode_or_none(result[1][1])),
2194
 
                                           result[2],
2195
 
                                           result[3],
2196
 
                                           result[4],
2197
 
                                           (utf8_decode_or_none(result[5][0]),
2198
 
                                            utf8_decode_or_none(result[5][1])),
2199
 
                                           result[6],
2200
 
                                           result[7],
2201
 
                                          )
 
2213
                            result = _process_entry(current_entry, None)
 
2214
                            if result is not None:
 
2215
                                if result is not uninteresting:
 
2216
                                    yield result
2202
2217
                        block_index +=1
2203
2218
                        if (block_index < len(state._dirblocks) and
2204
2219
                            osutils.is_inside(current_root,
2233
2248
                        pass
2234
2249
                    elif current_path_info is None:
2235
2250
                        # no path is fine: the per entry code will handle it.
2236
 
                        for result in _process_entry(current_entry, current_path_info):
2237
 
                            # this check should probably be outside the loop: one
2238
 
                            # 'iterate two trees' api, and then _iter_changes filters
2239
 
                            # unchanged pairs. - RBC 20070226
2240
 
                            if (include_unchanged
2241
 
                                or result[2]                    # content change
2242
 
                                or result[3][0] != result[3][1] # versioned status
2243
 
                                or result[4][0] != result[4][1] # parent id
2244
 
                                or result[5][0] != result[5][1] # name
2245
 
                                or result[6][0] != result[6][1] # kind
2246
 
                                or result[7][0] != result[7][1] # executable
2247
 
                                ):
2248
 
                                yield (result[0],
2249
 
                                       (utf8_decode_or_none(result[1][0]),
2250
 
                                        utf8_decode_or_none(result[1][1])),
2251
 
                                       result[2],
2252
 
                                       result[3],
2253
 
                                       result[4],
2254
 
                                       (utf8_decode_or_none(result[5][0]),
2255
 
                                        utf8_decode_or_none(result[5][1])),
2256
 
                                       result[6],
2257
 
                                       result[7],
2258
 
                                      )
 
2251
                        result = _process_entry(current_entry, current_path_info)
 
2252
                        if result is not None:
 
2253
                            if result is not uninteresting:
 
2254
                                yield result
2259
2255
                    elif (current_entry[0][1] != current_path_info[1]
2260
2256
                          or current_entry[1][target_index][0] in 'ar'):
2261
2257
                        # The current path on disk doesn't match the dirblock
2270
2266
                        else:
2271
2267
                            # entry referring to file not present on disk.
2272
2268
                            # advance the entry only, after processing.
2273
 
                            for result in _process_entry(current_entry, None):
2274
 
                                # this check should probably be outside the loop: one
2275
 
                                # 'iterate two trees' api, and then _iter_changes filters
2276
 
                                # unchanged pairs. - RBC 20070226
2277
 
                                if (include_unchanged
2278
 
                                    or result[2]                    # content change
2279
 
                                    or result[3][0] != result[3][1] # versioned status
2280
 
                                    or result[4][0] != result[4][1] # parent id
2281
 
                                    or result[5][0] != result[5][1] # name
2282
 
                                    or result[6][0] != result[6][1] # kind
2283
 
                                    or result[7][0] != result[7][1] # executable
2284
 
                                    ):
2285
 
                                    yield (result[0],
2286
 
                                           (utf8_decode_or_none(result[1][0]),
2287
 
                                            utf8_decode_or_none(result[1][1])),
2288
 
                                           result[2],
2289
 
                                           result[3],
2290
 
                                           result[4],
2291
 
                                           (utf8_decode_or_none(result[5][0]),
2292
 
                                            utf8_decode_or_none(result[5][1])),
2293
 
                                           result[6],
2294
 
                                           result[7],
2295
 
                                          )
 
2269
                            result = _process_entry(current_entry, None)
 
2270
                            if result is not None:
 
2271
                                if result is not uninteresting:
 
2272
                                    yield result
2296
2273
                            advance_path = False
2297
2274
                    else:
2298
 
                        for result in _process_entry(current_entry, current_path_info):
2299
 
                            # this check should probably be outside the loop: one
2300
 
                            # 'iterate two trees' api, and then _iter_changes filters
2301
 
                            # unchanged pairs. - RBC 20070226
 
2275
                        result = _process_entry(current_entry, current_path_info)
 
2276
                        if result is not None:
2302
2277
                            path_handled = True
2303
 
                            if (include_unchanged
2304
 
                                or result[2]                    # content change
2305
 
                                or result[3][0] != result[3][1] # versioned status
2306
 
                                or result[4][0] != result[4][1] # parent id
2307
 
                                or result[5][0] != result[5][1] # name
2308
 
                                or result[6][0] != result[6][1] # kind
2309
 
                                or result[7][0] != result[7][1] # executable
2310
 
                                ):
2311
 
                                yield (result[0],
2312
 
                                       (utf8_decode_or_none(result[1][0]),
2313
 
                                        utf8_decode_or_none(result[1][1])),
2314
 
                                       result[2],
2315
 
                                       result[3],
2316
 
                                       result[4],
2317
 
                                       (utf8_decode_or_none(result[5][0]),
2318
 
                                        utf8_decode_or_none(result[5][1])),
2319
 
                                       result[6],
2320
 
                                       result[7],
2321
 
                                      )
 
2278
                            if result is not uninteresting:
 
2279
                                yield result
2322
2280
                    if advance_entry and current_entry is not None:
2323
2281
                        entry_index += 1
2324
2282
                        if entry_index < len(current_block[1]):
2335
2293
                                    stat.S_ISREG(current_path_info[3].st_mode)
2336
2294
                                    and stat.S_IEXEC & current_path_info[3].st_mode)
2337
2295
                                yield (None,
2338
 
                                    (None, utf8_decode_or_none(current_path_info[0])),
 
2296
                                    (None, utf8_decode(current_path_info[0])[0]),
2339
2297
                                    True,
2340
2298
                                    (False, False),
2341
2299
                                    (None, None),
2342
 
                                    (None, utf8_decode_or_none(current_path_info[1])),
 
2300
                                    (None, utf8_decode(current_path_info[1])[0]),
2343
2301
                                    (None, current_path_info[2]),
2344
2302
                                    (None, new_executable))
2345
2303
                            # dont descend into this unversioned path if it is