~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2010-09-01 08:02:42 UTC
  • mfrom: (5390.3.3 faster-revert-593560)
  • Revision ID: pqm@pqm.ubuntu.com-20100901080242-esg62ody4frwmy66
(spiv) Avoid repeatedly calling self.target.all_file_ids() in
 InterTree.iter_changes. (Andrew Bennetts)

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2006, 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2006-2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
187
187
the file on disk, and then immediately discard, the overhead of object creation
188
188
becomes a significant cost.
189
189
 
190
 
Figures: Creating a tuple from from 3 elements was profiled at 0.0625
 
190
Figures: Creating a tuple from 3 elements was profiled at 0.0625
191
191
microseconds, whereas creating a object which is subclassed from tuple was
192
192
0.500 microseconds, and creating an object with 3 elements and slots was 3
193
193
microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get
204
204
import bisect
205
205
import binascii
206
206
import errno
 
207
import operator
207
208
import os
208
209
from stat import S_IEXEC
209
210
import stat
219
220
    inventory,
220
221
    lock,
221
222
    osutils,
 
223
    static_tuple,
222
224
    trace,
223
225
    )
224
226
 
267
269
    """An interface for getting sha1s of a file."""
268
270
 
269
271
    def sha1(self, abspath):
270
 
        """Return the sha1 of a file given its absolute path."""
 
272
        """Return the sha1 of a file given its absolute path.
 
273
 
 
274
        :param abspath:  May be a filesystem encoded absolute path
 
275
             or a unicode path.
 
276
        """
271
277
        raise NotImplementedError(self.sha1)
272
278
 
273
279
    def stat_and_sha1(self, abspath):
274
280
        """Return the stat and sha1 of a file given its absolute path.
275
281
        
 
282
        :param abspath:  May be a filesystem encoded absolute path
 
283
             or a unicode path.
 
284
 
276
285
        Note: the stat should be the stat of the physical file
277
286
        while the sha may be the sha of its canonical content.
278
287
        """
540
549
           self._ensure_block(block_index, entry_index, utf8path)
541
550
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
542
551
        if self._id_index:
543
 
            self._id_index.setdefault(entry_key[2], set()).add(entry_key)
 
552
            self._add_to_id_index(self._id_index, entry_key)
544
553
 
545
554
    def _bisect(self, paths):
546
555
        """Bisect through the disk structure for specific rows.
1270
1279
    def update_by_delta(self, delta):
1271
1280
        """Apply an inventory delta to the dirstate for tree 0
1272
1281
 
 
1282
        This is the workhorse for apply_inventory_delta in dirstate based
 
1283
        trees.
 
1284
 
1273
1285
        :param delta: An inventory delta.  See Inventory.apply_delta for
1274
1286
            details.
1275
1287
        """
1276
1288
        self._read_dirblocks_if_needed()
 
1289
        encode = cache_utf8.encode
1277
1290
        insertions = {}
1278
1291
        removals = {}
1279
 
        for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
 
1292
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1293
        # items or items placed under files/links/tree-references. We get
 
1294
        # references from every item in the delta that is not a deletion and
 
1295
        # is not itself the root.
 
1296
        parents = set()
 
1297
        # Added ids must not be in the dirstate already. This set holds those
 
1298
        # ids.
 
1299
        new_ids = set()
 
1300
        # This loop transforms the delta to single atomic operations that can
 
1301
        # be executed and validated.
 
1302
        for old_path, new_path, file_id, inv_entry in sorted(
 
1303
            inventory._check_delta_unique_old_paths(
 
1304
            inventory._check_delta_unique_new_paths(
 
1305
            inventory._check_delta_ids_match_entry(
 
1306
            inventory._check_delta_ids_are_valid(
 
1307
            inventory._check_delta_new_path_entry_both_or_None(delta))))),
 
1308
            reverse=True):
1280
1309
            if (file_id in insertions) or (file_id in removals):
1281
 
                raise AssertionError("repeated file id in delta %r" % (file_id,))
 
1310
                raise errors.InconsistentDelta(old_path or new_path, file_id,
 
1311
                    "repeated file_id")
1282
1312
            if old_path is not None:
1283
1313
                old_path = old_path.encode('utf-8')
1284
1314
                removals[file_id] = old_path
 
1315
            else:
 
1316
                new_ids.add(file_id)
1285
1317
            if new_path is not None:
 
1318
                if inv_entry is None:
 
1319
                    raise errors.InconsistentDelta(new_path, file_id,
 
1320
                        "new_path with no entry")
1286
1321
                new_path = new_path.encode('utf-8')
1287
 
                dirname, basename = osutils.split(new_path)
1288
 
                key = (dirname, basename, file_id)
 
1322
                dirname_utf8, basename = osutils.split(new_path)
 
1323
                if basename:
 
1324
                    parents.add((dirname_utf8, inv_entry.parent_id))
 
1325
                key = (dirname_utf8, basename, file_id)
1289
1326
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1290
1327
                if minikind == 't':
1291
 
                    fingerprint = inv_entry.reference_revision
 
1328
                    fingerprint = inv_entry.reference_revision or ''
1292
1329
                else:
1293
1330
                    fingerprint = ''
1294
1331
                insertions[file_id] = (key, minikind, inv_entry.executable,
1303
1340
                    minikind = child[1][0][0]
1304
1341
                    fingerprint = child[1][0][4]
1305
1342
                    executable = child[1][0][3]
1306
 
                    old_child_path = osutils.pathjoin(child[0][0],
1307
 
                                                      child[0][1])
 
1343
                    old_child_path = osutils.pathjoin(child_dirname,
 
1344
                                                      child_basename)
1308
1345
                    removals[child[0][2]] = old_child_path
1309
1346
                    child_suffix = child_dirname[len(old_path):]
1310
1347
                    new_child_dirname = (new_path + child_suffix)
1311
1348
                    key = (new_child_dirname, child_basename, child[0][2])
1312
 
                    new_child_path = os.path.join(new_child_dirname,
1313
 
                                                  child_basename)
 
1349
                    new_child_path = osutils.pathjoin(new_child_dirname,
 
1350
                                                      child_basename)
1314
1351
                    insertions[child[0][2]] = (key, minikind, executable,
1315
1352
                                               fingerprint, new_child_path)
1316
 
        self._apply_removals(removals.values())
1317
 
        self._apply_insertions(insertions.values())
 
1353
        self._check_delta_ids_absent(new_ids, delta, 0)
 
1354
        try:
 
1355
            self._apply_removals(removals.iteritems())
 
1356
            self._apply_insertions(insertions.values())
 
1357
            # Validate parents
 
1358
            self._after_delta_check_parents(parents, 0)
 
1359
        except errors.BzrError, e:
 
1360
            self._changes_aborted = True
 
1361
            if 'integrity error' not in str(e):
 
1362
                raise
 
1363
            # _get_entry raises BzrError when a request is inconsistent; we
 
1364
            # want such errors to be shown as InconsistentDelta - and that 
 
1365
            # fits the behaviour we trigger.
 
1366
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1318
1367
 
1319
1368
    def _apply_removals(self, removals):
1320
 
        for path in sorted(removals, reverse=True):
 
1369
        for file_id, path in sorted(removals, reverse=True,
 
1370
            key=operator.itemgetter(1)):
1321
1371
            dirname, basename = osutils.split(path)
1322
1372
            block_i, entry_i, d_present, f_present = \
1323
1373
                self._get_block_entry_index(dirname, basename, 0)
1324
 
            entry = self._dirblocks[block_i][1][entry_i]
 
1374
            try:
 
1375
                entry = self._dirblocks[block_i][1][entry_i]
 
1376
            except IndexError:
 
1377
                self._changes_aborted = True
 
1378
                raise errors.InconsistentDelta(path, file_id,
 
1379
                    "Wrong path for old path.")
 
1380
            if not f_present or entry[1][0][0] in 'ar':
 
1381
                self._changes_aborted = True
 
1382
                raise errors.InconsistentDelta(path, file_id,
 
1383
                    "Wrong path for old path.")
 
1384
            if file_id != entry[0][2]:
 
1385
                self._changes_aborted = True
 
1386
                raise errors.InconsistentDelta(path, file_id,
 
1387
                    "Attempt to remove path has wrong id - found %r."
 
1388
                    % entry[0][2])
1325
1389
            self._make_absent(entry)
1326
1390
            # See if we have a malformed delta: deleting a directory must not
1327
1391
            # leave crud behind. This increases the number of bisects needed
1335
1399
                # be due to it being in a parent tree, or a corrupt delta.
1336
1400
                for child_entry in self._dirblocks[block_i][1]:
1337
1401
                    if child_entry[1][0][0] not in ('r', 'a'):
 
1402
                        self._changes_aborted = True
1338
1403
                        raise errors.InconsistentDelta(path, entry[0][2],
1339
1404
                            "The file id was deleted but its children were "
1340
1405
                            "not deleted.")
1341
1406
 
1342
1407
    def _apply_insertions(self, adds):
1343
 
        for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1344
 
            self.update_minimal(key, minikind, executable, fingerprint,
1345
 
                                path_utf8=path_utf8)
 
1408
        try:
 
1409
            for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
 
1410
                self.update_minimal(key, minikind, executable, fingerprint,
 
1411
                                    path_utf8=path_utf8)
 
1412
        except errors.NotVersionedError:
 
1413
            self._changes_aborted = True
 
1414
            raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
 
1415
                "Missing parent")
1346
1416
 
1347
1417
    def update_basis_by_delta(self, delta, new_revid):
1348
1418
        """Update the parents of this tree after a commit.
1392
1462
        # At the same time, to reduce interface friction we convert the input
1393
1463
        # inventory entries to dirstate.
1394
1464
        root_only = ('', '')
 
1465
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1466
        # items or items placed under files/links/tree-references. We get
 
1467
        # references from every item in the delta that is not a deletion and
 
1468
        # is not itself the root.
 
1469
        parents = set()
 
1470
        # Added ids must not be in the dirstate already. This set holds those
 
1471
        # ids.
 
1472
        new_ids = set()
1395
1473
        for old_path, new_path, file_id, inv_entry in delta:
 
1474
            if inv_entry is not None and file_id != inv_entry.file_id:
 
1475
                raise errors.InconsistentDelta(new_path, file_id,
 
1476
                    "mismatched entry file_id %r" % inv_entry)
 
1477
            if new_path is not None:
 
1478
                if inv_entry is None:
 
1479
                    raise errors.InconsistentDelta(new_path, file_id,
 
1480
                        "new_path with no entry")
 
1481
                new_path_utf8 = encode(new_path)
 
1482
                # note the parent for validation
 
1483
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
 
1484
                if basename_utf8:
 
1485
                    parents.add((dirname_utf8, inv_entry.parent_id))
1396
1486
            if old_path is None:
1397
1487
                adds.append((None, encode(new_path), file_id,
1398
1488
                    inv_to_entry(inv_entry), True))
 
1489
                new_ids.add(file_id)
1399
1490
            elif new_path is None:
1400
1491
                deletes.append((encode(old_path), None, file_id, None, True))
1401
1492
            elif (old_path, new_path) != root_only:
1413
1504
                # for 'r' items on every pass.
1414
1505
                self._update_basis_apply_deletes(deletes)
1415
1506
                deletes = []
1416
 
                new_path_utf8 = encode(new_path)
1417
1507
                # Split into an add/delete pair recursively.
1418
1508
                adds.append((None, new_path_utf8, file_id,
1419
1509
                    inv_to_entry(inv_entry), False))
1445
1535
                # of everything.
1446
1536
                changes.append((encode(old_path), encode(new_path), file_id,
1447
1537
                    inv_to_entry(inv_entry)))
1448
 
 
1449
 
        # Finish expunging deletes/first half of renames.
1450
 
        self._update_basis_apply_deletes(deletes)
1451
 
        # Reinstate second half of renames and new paths.
1452
 
        self._update_basis_apply_adds(adds)
1453
 
        # Apply in-situ changes.
1454
 
        self._update_basis_apply_changes(changes)
 
1538
        self._check_delta_ids_absent(new_ids, delta, 1)
 
1539
        try:
 
1540
            # Finish expunging deletes/first half of renames.
 
1541
            self._update_basis_apply_deletes(deletes)
 
1542
            # Reinstate second half of renames and new paths.
 
1543
            self._update_basis_apply_adds(adds)
 
1544
            # Apply in-situ changes.
 
1545
            self._update_basis_apply_changes(changes)
 
1546
            # Validate parents
 
1547
            self._after_delta_check_parents(parents, 1)
 
1548
        except errors.BzrError, e:
 
1549
            self._changes_aborted = True
 
1550
            if 'integrity error' not in str(e):
 
1551
                raise
 
1552
            # _get_entry raises BzrError when a request is inconsistent; we
 
1553
            # want such errors to be shown as InconsistentDelta - and that 
 
1554
            # fits the behaviour we trigger. Partof this is driven by dirstate
 
1555
            # only supporting deltas that turn the basis into a closer fit to
 
1556
            # the active tree.
 
1557
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1455
1558
 
1456
1559
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1457
1560
        self._header_state = DirState.IN_MEMORY_MODIFIED
1458
1561
        self._id_index = None
1459
1562
        return
1460
1563
 
 
1564
    def _check_delta_ids_absent(self, new_ids, delta, tree_index):
 
1565
        """Check that none of the file_ids in new_ids are present in a tree."""
 
1566
        if not new_ids:
 
1567
            return
 
1568
        id_index = self._get_id_index()
 
1569
        for file_id in new_ids:
 
1570
            for key in id_index.get(file_id, ()):
 
1571
                block_i, entry_i, d_present, f_present = \
 
1572
                    self._get_block_entry_index(key[0], key[1], tree_index)
 
1573
                if not f_present:
 
1574
                    # In a different tree
 
1575
                    continue
 
1576
                entry = self._dirblocks[block_i][1][entry_i]
 
1577
                if entry[0][2] != file_id:
 
1578
                    # Different file_id, so not what we want.
 
1579
                    continue
 
1580
                # NB: No changes made before this helper is called, so no need
 
1581
                # to set the _changes_aborted flag.
 
1582
                raise errors.InconsistentDelta(
 
1583
                    ("%s/%s" % key[0:2]).decode('utf8'), file_id,
 
1584
                    "This file_id is new in the delta but already present in "
 
1585
                    "the target")
 
1586
 
1461
1587
    def _update_basis_apply_adds(self, adds):
1462
1588
        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
1463
1589
 
1528
1654
        null = DirState.NULL_PARENT_DETAILS
1529
1655
        for old_path, new_path, file_id, _, real_delete in deletes:
1530
1656
            if real_delete != (new_path is None):
 
1657
                self._changes_aborted = True
1531
1658
                raise AssertionError("bad delete delta")
1532
1659
            # the entry for this file_id must be in tree 1.
1533
1660
            dirname, basename = osutils.split(old_path)
1566
1693
                    # it is being resurrected here, so blank it out temporarily.
1567
1694
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1568
1695
 
 
1696
    def _after_delta_check_parents(self, parents, index):
 
1697
        """Check that parents required by the delta are all intact.
 
1698
        
 
1699
        :param parents: An iterable of (path_utf8, file_id) tuples which are
 
1700
            required to be present in tree 'index' at path_utf8 with id file_id
 
1701
            and be a directory.
 
1702
        :param index: The column in the dirstate to check for parents in.
 
1703
        """
 
1704
        for dirname_utf8, file_id in parents:
 
1705
            # Get the entry - the ensures that file_id, dirname_utf8 exists and
 
1706
            # has the right file id.
 
1707
            entry = self._get_entry(index, file_id, dirname_utf8)
 
1708
            if entry[1] is None:
 
1709
                self._changes_aborted = True
 
1710
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1711
                    file_id, "This parent is not present.")
 
1712
            # Parents of things must be directories
 
1713
            if entry[1][index][0] != 'd':
 
1714
                self._changes_aborted = True
 
1715
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1716
                    file_id, "This parent is not a directory.")
 
1717
 
1569
1718
    def _observed_sha1(self, entry, sha1, stat_value,
1570
1719
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1571
1720
        """Note the sha1 of a file.
1814
1963
        self._read_dirblocks_if_needed()
1815
1964
        if path_utf8 is not None:
1816
1965
            if type(path_utf8) is not str:
1817
 
                raise AssertionError('path_utf8 is not a str: %s %s'
 
1966
                raise errors.BzrError('path_utf8 is not a str: %s %r'
1818
1967
                    % (type(path_utf8), path_utf8))
1819
1968
            # path lookups are faster
1820
1969
            dirname, basename = osutils.split(path_utf8)
1832
1981
                                          ' tree_index, file_id and path')
1833
1982
            return entry
1834
1983
        else:
1835
 
            possible_keys = self._get_id_index().get(fileid_utf8, None)
 
1984
            possible_keys = self._get_id_index().get(fileid_utf8, ())
1836
1985
            if not possible_keys:
1837
1986
                return None, None
1838
1987
            for key in possible_keys:
1849
1998
                entry_index, present = self._find_entry_index(key, block)
1850
1999
                if present:
1851
2000
                    entry = self._dirblocks[block_index][1][entry_index]
 
2001
                    # TODO: We might want to assert that entry[0][2] ==
 
2002
                    #       fileid_utf8.
1852
2003
                    if entry[1][tree_index][0] in 'fdlt':
1853
2004
                        # this is the result we are looking for: the
1854
2005
                        # real home of this file_id in this tree.
1993
2144
                yield entry
1994
2145
 
1995
2146
    def _get_id_index(self):
1996
 
        """Get an id index of self._dirblocks."""
 
2147
        """Get an id index of self._dirblocks.
 
2148
        
 
2149
        This maps from file_id => [(directory, name, file_id)] entries where
 
2150
        that file_id appears in one of the trees.
 
2151
        """
1997
2152
        if self._id_index is None:
1998
2153
            id_index = {}
1999
2154
            for key, tree_details in self._iter_entries():
2000
 
                id_index.setdefault(key[2], set()).add(key)
 
2155
                self._add_to_id_index(id_index, key)
2001
2156
            self._id_index = id_index
2002
2157
        return self._id_index
2003
2158
 
 
2159
    def _add_to_id_index(self, id_index, entry_key):
 
2160
        """Add this entry to the _id_index mapping."""
 
2161
        # This code used to use a set for every entry in the id_index. However,
 
2162
        # it is *rare* to have more than one entry. So a set is a large
 
2163
        # overkill. And even when we do, we won't ever have more than the
 
2164
        # number of parent trees. Which is still a small number (rarely >2). As
 
2165
        # such, we use a simple tuple, and do our own uniqueness checks. While
 
2166
        # the 'in' check is O(N) since N is nicely bounded it shouldn't ever
 
2167
        # cause quadratic failure.
 
2168
        # TODO: This should use StaticTuple
 
2169
        file_id = entry_key[2]
 
2170
        entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
 
2171
        if file_id not in id_index:
 
2172
            id_index[file_id] = static_tuple.StaticTuple(entry_key,)
 
2173
        else:
 
2174
            entry_keys = id_index[file_id]
 
2175
            if entry_key not in entry_keys:
 
2176
                id_index[file_id] = entry_keys + (entry_key,)
 
2177
 
 
2178
    def _remove_from_id_index(self, id_index, entry_key):
 
2179
        """Remove this entry from the _id_index mapping.
 
2180
 
 
2181
        It is an programming error to call this when the entry_key is not
 
2182
        already present.
 
2183
        """
 
2184
        file_id = entry_key[2]
 
2185
        entry_keys = list(id_index[file_id])
 
2186
        entry_keys.remove(entry_key)
 
2187
        id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
 
2188
 
2004
2189
    def _get_output_lines(self, lines):
2005
2190
        """Format lines for final output.
2006
2191
 
2028
2213
 
2029
2214
    @staticmethod
2030
2215
    def on_file(path, sha1_provider=None):
2031
 
        """Construct a DirState on the file at path path.
 
2216
        """Construct a DirState on the file at path "path".
2032
2217
 
2033
2218
        :param path: The path at which the dirstate file on disk should live.
2034
2219
        :param sha1_provider: an object meeting the SHA1Provider interface.
2206
2391
        self.update_minimal(('', '', new_id), 'd',
2207
2392
            path_utf8='', packed_stat=entry[1][0][4])
2208
2393
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2209
 
        if self._id_index is not None:
2210
 
            self._id_index.setdefault(new_id, set()).add(entry[0])
2211
2394
 
2212
2395
    def set_parent_trees(self, trees, ghosts):
2213
2396
        """Set the parent trees for the dirstate.
2266
2449
                continue
2267
2450
            by_path[entry[0]] = [entry[1][0]] + \
2268
2451
                [DirState.NULL_PARENT_DETAILS] * parent_count
2269
 
            id_index[entry[0][2]] = set([entry[0]])
 
2452
            # TODO: Possibly inline this, since we know it isn't present yet
 
2453
            #       id_index[entry[0][2]] = (entry[0],)
 
2454
            self._add_to_id_index(id_index, entry[0])
2270
2455
 
2271
2456
        # now the parent trees:
2272
2457
        for tree_index, tree in enumerate(parent_trees):
2294
2479
                new_entry_key = (dirname, basename, file_id)
2295
2480
                # tree index consistency: All other paths for this id in this tree
2296
2481
                # index must point to the correct path.
2297
 
                for entry_key in id_index.setdefault(file_id, set()):
 
2482
                for entry_key in id_index.get(file_id, ()):
2298
2483
                    # TODO:PROFILING: It might be faster to just update
2299
2484
                    # rather than checking if we need to, and then overwrite
2300
2485
                    # the one we are located at.
2306
2491
                        by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2307
2492
                # by path consistency: Insert into an existing path record (trivial), or
2308
2493
                # add a new one with relocation pointers for the other tree indexes.
2309
 
                if new_entry_key in id_index[file_id]:
 
2494
                entry_keys = id_index.get(file_id, ())
 
2495
                if new_entry_key in entry_keys:
2310
2496
                    # there is already an entry where this data belongs, just insert it.
2311
2497
                    by_path[new_entry_key][tree_index] = \
2312
2498
                        self._inv_entry_to_details(entry)
2317
2503
                    new_details = []
2318
2504
                    for lookup_index in xrange(tree_index):
2319
2505
                        # boundary case: this is the first occurence of file_id
2320
 
                        # so there are no id_indexs, possibly take this out of
 
2506
                        # so there are no id_indexes, possibly take this out of
2321
2507
                        # the loop?
2322
 
                        if not len(id_index[file_id]):
 
2508
                        if not len(entry_keys):
2323
2509
                            new_details.append(DirState.NULL_PARENT_DETAILS)
2324
2510
                        else:
2325
2511
                            # grab any one entry, use it to find the right path.
2326
2512
                            # TODO: optimise this to reduce memory use in highly
2327
2513
                            # fragmented situations by reusing the relocation
2328
2514
                            # records.
2329
 
                            a_key = iter(id_index[file_id]).next()
 
2515
                            a_key = iter(entry_keys).next()
2330
2516
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
2331
2517
                                # its a pointer or missing statement, use it as is.
2332
2518
                                new_details.append(by_path[a_key][lookup_index])
2337
2523
                    new_details.append(self._inv_entry_to_details(entry))
2338
2524
                    new_details.extend(new_location_suffix)
2339
2525
                    by_path[new_entry_key] = new_details
2340
 
                    id_index[file_id].add(new_entry_key)
 
2526
                    self._add_to_id_index(id_index, new_entry_key)
2341
2527
        # --- end generation of full tree mappings
2342
2528
 
2343
2529
        # sort and output all the entries
2372
2558
        if 'evil' in debug.debug_flags:
2373
2559
            trace.mutter_callsite(1,
2374
2560
                "set_state_from_inventory called; please mutate the tree instead")
 
2561
        tracing = 'dirstate' in debug.debug_flags
 
2562
        if tracing:
 
2563
            trace.mutter("set_state_from_inventory trace:")
2375
2564
        self._read_dirblocks_if_needed()
2376
2565
        # sketch:
2377
2566
        # Two iterators: current data and new data, both in dirblock order.
2386
2575
        new_iterator = new_inv.iter_entries_by_dir()
2387
2576
        # we will be modifying the dirstate, so we need a stable iterator. In
2388
2577
        # future we might write one, for now we just clone the state into a
2389
 
        # list - which is a shallow copy.
 
2578
        # list using a copy so that we see every original item and don't have
 
2579
        # to adjust the position when items are inserted or deleted in the
 
2580
        # underlying dirstate.
2390
2581
        old_iterator = iter(list(self._iter_entries()))
2391
2582
        # both must have roots so this is safe:
2392
2583
        current_new = new_iterator.next()
2426
2617
            # we make both end conditions explicit
2427
2618
            if not current_old:
2428
2619
                # old is finished: insert current_new into the state.
 
2620
                if tracing:
 
2621
                    trace.mutter("Appending from new '%s'.",
 
2622
                        new_path_utf8.decode('utf8'))
2429
2623
                self.update_minimal(new_entry_key, current_new_minikind,
2430
2624
                    executable=current_new[1].executable,
2431
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2625
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2626
                    fullscan=True)
2432
2627
                current_new = advance(new_iterator)
2433
2628
            elif not current_new:
2434
2629
                # new is finished
 
2630
                if tracing:
 
2631
                    trace.mutter("Truncating from old '%s/%s'.",
 
2632
                        current_old[0][0].decode('utf8'),
 
2633
                        current_old[0][1].decode('utf8'))
2435
2634
                self._make_absent(current_old)
2436
2635
                current_old = advance(old_iterator)
2437
2636
            elif new_entry_key == current_old[0]:
2444
2643
                # kind has changed.
2445
2644
                if (current_old[1][0][3] != current_new[1].executable or
2446
2645
                    current_old[1][0][0] != current_new_minikind):
 
2646
                    if tracing:
 
2647
                        trace.mutter("Updating in-place change '%s'.",
 
2648
                            new_path_utf8.decode('utf8'))
2447
2649
                    self.update_minimal(current_old[0], current_new_minikind,
2448
2650
                        executable=current_new[1].executable,
2449
 
                        path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2651
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2652
                        fullscan=True)
2450
2653
                # both sides are dealt with, move on
2451
2654
                current_old = advance(old_iterator)
2452
2655
                current_new = advance(new_iterator)
2455
2658
                      and new_entry_key[1:] < current_old[0][1:])):
2456
2659
                # new comes before:
2457
2660
                # add a entry for this and advance new
 
2661
                if tracing:
 
2662
                    trace.mutter("Inserting from new '%s'.",
 
2663
                        new_path_utf8.decode('utf8'))
2458
2664
                self.update_minimal(new_entry_key, current_new_minikind,
2459
2665
                    executable=current_new[1].executable,
2460
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2666
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2667
                    fullscan=True)
2461
2668
                current_new = advance(new_iterator)
2462
2669
            else:
2463
2670
                # we've advanced past the place where the old key would be,
2464
2671
                # without seeing it in the new list.  so it must be gone.
 
2672
                if tracing:
 
2673
                    trace.mutter("Deleting from old '%s/%s'.",
 
2674
                        current_old[0][0].decode('utf8'),
 
2675
                        current_old[0][1].decode('utf8'))
2465
2676
                self._make_absent(current_old)
2466
2677
                current_old = advance(old_iterator)
2467
2678
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2468
2679
        self._id_index = None
2469
2680
        self._packed_stat_index = None
 
2681
        if tracing:
 
2682
            trace.mutter("set_state_from_inventory complete.")
2470
2683
 
2471
2684
    def _make_absent(self, current_old):
2472
2685
        """Mark current_old - an entry - as absent for tree 0.
2498
2711
            block[1].pop(entry_index)
2499
2712
            # if we have an id_index in use, remove this key from it for this id.
2500
2713
            if self._id_index is not None:
2501
 
                self._id_index[current_old[0][2]].remove(current_old[0])
 
2714
                self._remove_from_id_index(self._id_index, current_old[0])
2502
2715
        # update all remaining keys for this id to record it as absent. The
2503
2716
        # existing details may either be the record we are marking as deleted
2504
2717
        # (if there were other trees with the id present at this path), or may
2521
2734
        return last_reference
2522
2735
 
2523
2736
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2524
 
                       packed_stat=None, size=0, path_utf8=None):
 
2737
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
2525
2738
        """Update an entry to the state in tree 0.
2526
2739
 
2527
2740
        This will either create a new entry at 'key' or update an existing one.
2538
2751
        :param size: Size information for new entry
2539
2752
        :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2540
2753
                extra computation.
 
2754
        :param fullscan: If True then a complete scan of the dirstate is being
 
2755
            done and checking for duplicate rows should not be done. This
 
2756
            should only be set by set_state_from_inventory and similar methods.
2541
2757
 
2542
2758
        If packed_stat and fingerprint are not given, they're invalidated in
2543
2759
        the entry.
2552
2768
        new_details = (minikind, fingerprint, size, executable, packed_stat)
2553
2769
        id_index = self._get_id_index()
2554
2770
        if not present:
 
2771
            # New record. Check there isn't a entry at this path already.
 
2772
            if not fullscan:
 
2773
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
 
2774
                while low_index < len(block):
 
2775
                    entry = block[low_index]
 
2776
                    if entry[0][0:2] == key[0:2]:
 
2777
                        if entry[1][0][0] not in 'ar':
 
2778
                            # This entry has the same path (but a different id) as
 
2779
                            # the new entry we're adding, and is present in ths
 
2780
                            # tree.
 
2781
                            raise errors.InconsistentDelta(
 
2782
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
 
2783
                                "Attempt to add item at path already occupied by "
 
2784
                                "id %r" % entry[0][2])
 
2785
                        low_index += 1
 
2786
                    else:
 
2787
                        break
2555
2788
            # new entry, synthesis cross reference here,
2556
 
            existing_keys = id_index.setdefault(key[2], set())
 
2789
            existing_keys = id_index.get(key[2], ())
2557
2790
            if not existing_keys:
2558
2791
                # not currently in the state, simplest case
2559
2792
                new_entry = key, [new_details] + self._empty_parent_info()
2562
2795
                # grab one of them and use it to generate parent
2563
2796
                # relocation/absent entries.
2564
2797
                new_entry = key, [new_details]
2565
 
                for other_key in existing_keys:
 
2798
                # existing_keys can be changed as we iterate.
 
2799
                for other_key in tuple(existing_keys):
2566
2800
                    # change the record at other to be a pointer to this new
2567
2801
                    # record. The loop looks similar to the change to
2568
2802
                    # relocations when updating an existing record but its not:
2569
2803
                    # the test for existing kinds is different: this can be
2570
2804
                    # factored out to a helper though.
2571
 
                    other_block_index, present = self._find_block_index_from_key(other_key)
2572
 
                    if not present:
2573
 
                        raise AssertionError('could not find block for %s' % (other_key,))
2574
 
                    other_entry_index, present = self._find_entry_index(other_key,
2575
 
                                            self._dirblocks[other_block_index][1])
2576
 
                    if not present:
2577
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2805
                    other_block_index, present = self._find_block_index_from_key(
 
2806
                        other_key)
 
2807
                    if not present:
 
2808
                        raise AssertionError('could not find block for %s' % (
 
2809
                            other_key,))
 
2810
                    other_block = self._dirblocks[other_block_index][1]
 
2811
                    other_entry_index, present = self._find_entry_index(
 
2812
                        other_key, other_block)
 
2813
                    if not present:
 
2814
                        raise AssertionError(
 
2815
                            'update_minimal: could not find other entry for %s'
 
2816
                            % (other_key,))
2578
2817
                    if path_utf8 is None:
2579
2818
                        raise AssertionError('no path')
2580
 
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2581
 
                        ('r', path_utf8, 0, False, '')
 
2819
                    # Turn this other location into a reference to the new
 
2820
                    # location. This also updates the aliased iterator
 
2821
                    # (current_old in set_state_from_inventory) so that the old
 
2822
                    # entry, if not already examined, is skipped over by that
 
2823
                    # loop.
 
2824
                    other_entry = other_block[other_entry_index]
 
2825
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
 
2826
                    if self._maybe_remove_row(other_block, other_entry_index,
 
2827
                                              id_index):
 
2828
                        # If the row holding this was removed, we need to
 
2829
                        # recompute where this entry goes
 
2830
                        entry_index, _ = self._find_entry_index(key, block)
2582
2831
 
 
2832
                # This loop:
 
2833
                # adds a tuple to the new details for each column
 
2834
                #  - either by copying an existing relocation pointer inside that column
 
2835
                #  - or by creating a new pointer to the right row inside that column
2583
2836
                num_present_parents = self._num_present_parents()
 
2837
                if num_present_parents:
 
2838
                    # TODO: This re-evaluates the existing_keys set, do we need
 
2839
                    #       to do that ourselves?
 
2840
                    other_key = list(existing_keys)[0]
2584
2841
                for lookup_index in xrange(1, num_present_parents + 1):
2585
2842
                    # grab any one entry, use it to find the right path.
2586
2843
                    # TODO: optimise this to reduce memory use in highly
2593
2850
                    update_entry_index, present = \
2594
2851
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
2595
2852
                    if not present:
2596
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2853
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
2597
2854
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
2598
2855
                    if update_details[0] in 'ar': # relocated, absent
2599
2856
                        # its a pointer or absent in lookup_index's tree, use
2604
2861
                        pointer_path = osutils.pathjoin(*other_key[0:2])
2605
2862
                        new_entry[1].append(('r', pointer_path, 0, False, ''))
2606
2863
            block.insert(entry_index, new_entry)
2607
 
            existing_keys.add(key)
 
2864
            self._add_to_id_index(id_index, key)
2608
2865
        else:
2609
2866
            # Does the new state matter?
2610
2867
            block[entry_index][1][0] = new_details
2619
2876
            # converted to relocated.
2620
2877
            if path_utf8 is None:
2621
2878
                raise AssertionError('no path')
2622
 
            for entry_key in id_index.setdefault(key[2], set()):
 
2879
            existing_keys = id_index[key[2]]
 
2880
            if key not in existing_keys:
 
2881
                raise AssertionError('We found the entry in the blocks, but'
 
2882
                    ' the key is not in the id_index.'
 
2883
                    ' key: %s, existing_keys: %s' % (key, existing_keys))
 
2884
            for entry_key in id_index[key[2]]:
2623
2885
                # TODO:PROFILING: It might be faster to just update
2624
2886
                # rather than checking if we need to, and then overwrite
2625
2887
                # the one we are located at.
2645
2907
 
2646
2908
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2647
2909
 
 
2910
    def _maybe_remove_row(self, block, index, id_index):
 
2911
        """Remove index if it is absent or relocated across the row.
 
2912
        
 
2913
        id_index is updated accordingly.
 
2914
        :return: True if we removed the row, False otherwise
 
2915
        """
 
2916
        present_in_row = False
 
2917
        entry = block[index]
 
2918
        for column in entry[1]:
 
2919
            if column[0] not in 'ar':
 
2920
                present_in_row = True
 
2921
                break
 
2922
        if not present_in_row:
 
2923
            block.pop(index)
 
2924
            self._remove_from_id_index(id_index, entry[0])
 
2925
            return True
 
2926
        return False
 
2927
 
2648
2928
    def _validate(self):
2649
2929
        """Check that invariants on the dirblock are correct.
2650
2930
 
2784
3064
            if absent_positions == tree_count:
2785
3065
                raise AssertionError(
2786
3066
                    "entry %r has no data for any tree." % (entry,))
 
3067
        if self._id_index is not None:
 
3068
            for file_id, entry_keys in self._id_index.iteritems():
 
3069
                for entry_key in entry_keys:
 
3070
                    if entry_key[2] != file_id:
 
3071
                        raise AssertionError(
 
3072
                            'file_id %r did not match entry key %s'
 
3073
                            % (file_id, entry_key))
 
3074
                if len(entry_keys) != len(set(entry_keys)):
 
3075
                    raise AssertionError(
 
3076
                        'id_index contained non-unique data for %s'
 
3077
                        % (entry_keys,))
2787
3078
 
2788
3079
    def _wipe_state(self):
2789
3080
        """Forget all state information about the dirstate."""
2931
3222
                           False, DirState.NULLSTAT)
2932
3223
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
2933
3224
    return link_or_sha1
2934
 
update_entry = py_update_entry
2935
3225
 
2936
3226
 
2937
3227
class ProcessEntryPython(object):
2938
3228
 
2939
 
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
 
3229
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2940
3230
        "last_source_parent", "last_target_parent", "include_unchanged",
2941
 
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2942
 
        "search_specific_files", "state", "source_index", "target_index",
2943
 
        "want_unversioned", "tree"]
 
3231
        "partial", "use_filesystem_for_exec", "utf8_decode",
 
3232
        "searched_specific_files", "search_specific_files",
 
3233
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
 
3234
        "state", "source_index", "target_index", "want_unversioned", "tree"]
2944
3235
 
2945
3236
    def __init__(self, include_unchanged, use_filesystem_for_exec,
2946
3237
        search_specific_files, state, source_index, target_index,
2947
3238
        want_unversioned, tree):
2948
3239
        self.old_dirname_to_file_id = {}
2949
3240
        self.new_dirname_to_file_id = {}
2950
 
        # Just a sentry, so that _process_entry can say that this
2951
 
        # record is handled, but isn't interesting to process (unchanged)
2952
 
        self.uninteresting = object()
 
3241
        # Are we doing a partial iter_changes?
 
3242
        self.partial = search_specific_files != set([''])
2953
3243
        # Using a list so that we can access the values and change them in
2954
3244
        # nested scope. Each one is [path, file_id, entry]
2955
3245
        self.last_source_parent = [None, None]
2958
3248
        self.use_filesystem_for_exec = use_filesystem_for_exec
2959
3249
        self.utf8_decode = cache_utf8._utf8_decode
2960
3250
        # for all search_indexs in each path at or under each element of
2961
 
        # search_specific_files, if the detail is relocated: add the id, and add the
2962
 
        # relocated path as one to search if its not searched already. If the
2963
 
        # detail is not relocated, add the id.
 
3251
        # search_specific_files, if the detail is relocated: add the id, and
 
3252
        # add the relocated path as one to search if its not searched already.
 
3253
        # If the detail is not relocated, add the id.
2964
3254
        self.searched_specific_files = set()
 
3255
        # When we search exact paths without expanding downwards, we record
 
3256
        # that here.
 
3257
        self.searched_exact_paths = set()
2965
3258
        self.search_specific_files = search_specific_files
 
3259
        # The parents up to the root of the paths we are searching.
 
3260
        # After all normal paths are returned, these specific items are returned.
 
3261
        self.search_specific_file_parents = set()
 
3262
        # The ids we've sent out in the delta.
 
3263
        self.seen_ids = set()
2966
3264
        self.state = state
2967
3265
        self.source_index = source_index
2968
3266
        self.target_index = target_index
 
3267
        if target_index != 0:
 
3268
            # A lot of code in here depends on target_index == 0
 
3269
            raise errors.BzrError('unsupported target index')
2969
3270
        self.want_unversioned = want_unversioned
2970
3271
        self.tree = tree
2971
3272
 
2973
3274
        """Compare an entry and real disk to generate delta information.
2974
3275
 
2975
3276
        :param path_info: top_relpath, basename, kind, lstat, abspath for
2976
 
            the path of entry. If None, then the path is considered absent.
2977
 
            (Perhaps we should pass in a concrete entry for this ?)
 
3277
            the path of entry. If None, then the path is considered absent in 
 
3278
            the target (Perhaps we should pass in a concrete entry for this ?)
2978
3279
            Basename is returned as a utf8 string because we expect this
2979
3280
            tuple will be ignored, and don't want to take the time to
2980
3281
            decode.
2981
 
        :return: None if these don't match
2982
 
                 A tuple of information about the change, or
2983
 
                 the object 'uninteresting' if these match, but are
2984
 
                 basically identical.
 
3282
        :return: (iter_changes_result, changed). If the entry has not been
 
3283
            handled then changed is None. Otherwise it is False if no content
 
3284
            or metadata changes have occurred, and True if any content or
 
3285
            metadata change has occurred. If self.include_unchanged is True then
 
3286
            if changed is not None, iter_changes_result will always be a result
 
3287
            tuple. Otherwise, iter_changes_result is None unless changed is
 
3288
            True.
2985
3289
        """
2986
3290
        if self.source_index is None:
2987
3291
            source_details = DirState.NULL_PARENT_DETAILS
3055
3359
                    if source_minikind != 'f':
3056
3360
                        content_change = True
3057
3361
                    else:
3058
 
                        # If the size is the same, check the sha:
3059
 
                        if target_details[2] == source_details[2]:
3060
 
                            if link_or_sha1 is None:
3061
 
                                # Stat cache miss:
3062
 
                                statvalue, link_or_sha1 = \
3063
 
                                    self.state._sha1_provider.stat_and_sha1(
3064
 
                                    path_info[4])
3065
 
                                self.state._observed_sha1(entry, link_or_sha1,
3066
 
                                    statvalue)
3067
 
                            content_change = (link_or_sha1 != source_details[1])
3068
 
                        else:
3069
 
                            # Size changed, so must be different
3070
 
                            content_change = True
 
3362
                        # Check the sha. We can't just rely on the size as
 
3363
                        # content filtering may mean differ sizes actually
 
3364
                        # map to the same content
 
3365
                        if link_or_sha1 is None:
 
3366
                            # Stat cache miss:
 
3367
                            statvalue, link_or_sha1 = \
 
3368
                                self.state._sha1_provider.stat_and_sha1(
 
3369
                                path_info[4])
 
3370
                            self.state._observed_sha1(entry, link_or_sha1,
 
3371
                                statvalue)
 
3372
                        content_change = (link_or_sha1 != source_details[1])
3071
3373
                    # Target details is updated at update_entry time
3072
3374
                    if self.use_filesystem_for_exec:
3073
3375
                        # We don't need S_ISREG here, because we are sure
3088
3390
                        content_change = False
3089
3391
                    target_exec = False
3090
3392
                else:
3091
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
3393
                    if path is None:
 
3394
                        path = pathjoin(old_dirname, old_basename)
 
3395
                    raise errors.BadFileKindError(path, path_info[2])
3092
3396
            if source_minikind == 'd':
3093
3397
                if path is None:
3094
3398
                    old_path = path = pathjoin(old_dirname, old_basename)
3095
3399
                self.old_dirname_to_file_id[old_path] = file_id
3096
3400
            # parent id is the entry for the path in the target tree
3097
 
            if old_dirname == self.last_source_parent[0]:
 
3401
            if old_basename and old_dirname == self.last_source_parent[0]:
3098
3402
                source_parent_id = self.last_source_parent[1]
3099
3403
            else:
3100
3404
                try:
3110
3414
                    self.last_source_parent[0] = old_dirname
3111
3415
                    self.last_source_parent[1] = source_parent_id
3112
3416
            new_dirname = entry[0][0]
3113
 
            if new_dirname == self.last_target_parent[0]:
 
3417
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
3114
3418
                target_parent_id = self.last_target_parent[1]
3115
3419
            else:
3116
3420
                try:
3133
3437
                    self.last_target_parent[1] = target_parent_id
3134
3438
 
3135
3439
            source_exec = source_details[3]
3136
 
            if (self.include_unchanged
3137
 
                or content_change
 
3440
            changed = (content_change
3138
3441
                or source_parent_id != target_parent_id
3139
3442
                or old_basename != entry[0][1]
3140
3443
                or source_exec != target_exec
3141
 
                ):
 
3444
                )
 
3445
            if not changed and not self.include_unchanged:
 
3446
                return None, False
 
3447
            else:
3142
3448
                if old_path is None:
3143
3449
                    old_path = path = pathjoin(old_dirname, old_basename)
3144
3450
                    old_path_u = self.utf8_decode(old_path)[0]
3157
3463
                       (source_parent_id, target_parent_id),
3158
3464
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3159
3465
                       (source_kind, target_kind),
3160
 
                       (source_exec, target_exec))
3161
 
            else:
3162
 
                return self.uninteresting
 
3466
                       (source_exec, target_exec)), changed
3163
3467
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3164
3468
            # looks like a new file
3165
3469
            path = pathjoin(entry[0][0], entry[0][1])
3186
3490
                       (None, parent_id),
3187
3491
                       (None, self.utf8_decode(entry[0][1])[0]),
3188
3492
                       (None, path_info[2]),
3189
 
                       (None, target_exec))
 
3493
                       (None, target_exec)), True
3190
3494
            else:
3191
3495
                # Its a missing file, report it as such.
3192
3496
                return (entry[0][2],
3196
3500
                       (None, parent_id),
3197
3501
                       (None, self.utf8_decode(entry[0][1])[0]),
3198
3502
                       (None, None),
3199
 
                       (None, False))
 
3503
                       (None, False)), True
3200
3504
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3201
3505
            # unversioned, possibly, or possibly not deleted: we dont care.
3202
3506
            # if its still on disk, *and* theres no other entry at this
3214
3518
                   (parent_id, None),
3215
3519
                   (self.utf8_decode(entry[0][1])[0], None),
3216
3520
                   (DirState._minikind_to_kind[source_minikind], None),
3217
 
                   (source_details[3], None))
 
3521
                   (source_details[3], None)), True
3218
3522
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3219
3523
            # a rename; could be a true rename, or a rename inherited from
3220
3524
            # a renamed parent. TODO: handle this efficiently. Its not
3232
3536
                "source_minikind=%r, target_minikind=%r"
3233
3537
                % (source_minikind, target_minikind))
3234
3538
            ## import pdb;pdb.set_trace()
3235
 
        return None
 
3539
        return None, None
3236
3540
 
3237
3541
    def __iter__(self):
3238
3542
        return self
3239
3543
 
 
3544
    def _gather_result_for_consistency(self, result):
 
3545
        """Check a result we will yield to make sure we are consistent later.
 
3546
        
 
3547
        This gathers result's parents into a set to output later.
 
3548
 
 
3549
        :param result: A result tuple.
 
3550
        """
 
3551
        if not self.partial or not result[0]:
 
3552
            return
 
3553
        self.seen_ids.add(result[0])
 
3554
        new_path = result[1][1]
 
3555
        if new_path:
 
3556
            # Not the root and not a delete: queue up the parents of the path.
 
3557
            self.search_specific_file_parents.update(
 
3558
                osutils.parent_directories(new_path.encode('utf8')))
 
3559
            # Add the root directory which parent_directories does not
 
3560
            # provide.
 
3561
            self.search_specific_file_parents.add('')
 
3562
 
3240
3563
    def iter_changes(self):
3241
3564
        """Iterate over the changes."""
3242
3565
        utf8_decode = cache_utf8._utf8_decode
3243
3566
        _cmp_by_dirs = cmp_by_dirs
3244
3567
        _process_entry = self._process_entry
3245
 
        uninteresting = self.uninteresting
3246
3568
        search_specific_files = self.search_specific_files
3247
3569
        searched_specific_files = self.searched_specific_files
3248
3570
        splitpath = osutils.splitpath
3318
3640
                continue
3319
3641
            path_handled = False
3320
3642
            for entry in root_entries:
3321
 
                result = _process_entry(entry, root_dir_info)
3322
 
                if result is not None:
 
3643
                result, changed = _process_entry(entry, root_dir_info)
 
3644
                if changed is not None:
3323
3645
                    path_handled = True
3324
 
                    if result is not uninteresting:
 
3646
                    if changed:
 
3647
                        self._gather_result_for_consistency(result)
 
3648
                    if changed or self.include_unchanged:
3325
3649
                        yield result
3326
3650
            if self.want_unversioned and not path_handled and root_dir_info:
3327
3651
                new_executable = bool(
3437
3761
                        for current_entry in current_block[1]:
3438
3762
                            # entry referring to file not present on disk.
3439
3763
                            # advance the entry only, after processing.
3440
 
                            result = _process_entry(current_entry, None)
3441
 
                            if result is not None:
3442
 
                                if result is not uninteresting:
 
3764
                            result, changed = _process_entry(current_entry, None)
 
3765
                            if changed is not None:
 
3766
                                if changed:
 
3767
                                    self._gather_result_for_consistency(result)
 
3768
                                if changed or self.include_unchanged:
3443
3769
                                    yield result
3444
3770
                        block_index +=1
3445
3771
                        if (block_index < len(self.state._dirblocks) and
3475
3801
                        pass
3476
3802
                    elif current_path_info is None:
3477
3803
                        # no path is fine: the per entry code will handle it.
3478
 
                        result = _process_entry(current_entry, current_path_info)
3479
 
                        if result is not None:
3480
 
                            if result is not uninteresting:
 
3804
                        result, changed = _process_entry(current_entry, current_path_info)
 
3805
                        if changed is not None:
 
3806
                            if changed:
 
3807
                                self._gather_result_for_consistency(result)
 
3808
                            if changed or self.include_unchanged:
3481
3809
                                yield result
3482
3810
                    elif (current_entry[0][1] != current_path_info[1]
3483
3811
                          or current_entry[1][self.target_index][0] in 'ar'):
3496
3824
                        else:
3497
3825
                            # entry referring to file not present on disk.
3498
3826
                            # advance the entry only, after processing.
3499
 
                            result = _process_entry(current_entry, None)
3500
 
                            if result is not None:
3501
 
                                if result is not uninteresting:
 
3827
                            result, changed = _process_entry(current_entry, None)
 
3828
                            if changed is not None:
 
3829
                                if changed:
 
3830
                                    self._gather_result_for_consistency(result)
 
3831
                                if changed or self.include_unchanged:
3502
3832
                                    yield result
3503
3833
                            advance_path = False
3504
3834
                    else:
3505
 
                        result = _process_entry(current_entry, current_path_info)
3506
 
                        if result is not None:
 
3835
                        result, changed = _process_entry(current_entry, current_path_info)
 
3836
                        if changed is not None:
3507
3837
                            path_handled = True
3508
 
                            if result is not uninteresting:
 
3838
                            if changed:
 
3839
                                self._gather_result_for_consistency(result)
 
3840
                            if changed or self.include_unchanged:
3509
3841
                                yield result
3510
3842
                    if advance_entry and current_entry is not None:
3511
3843
                        entry_index += 1
3570
3902
                        current_dir_info = dir_iterator.next()
3571
3903
                    except StopIteration:
3572
3904
                        current_dir_info = None
3573
 
_process_entry = ProcessEntryPython
 
3905
        for result in self._iter_specific_file_parents():
 
3906
            yield result
 
3907
 
 
3908
    def _iter_specific_file_parents(self):
 
3909
        """Iter over the specific file parents."""
 
3910
        while self.search_specific_file_parents:
 
3911
            # Process the parent directories for the paths we were iterating.
 
3912
            # Even in extremely large trees this should be modest, so currently
 
3913
            # no attempt is made to optimise.
 
3914
            path_utf8 = self.search_specific_file_parents.pop()
 
3915
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
3916
                # We've examined this path.
 
3917
                continue
 
3918
            if path_utf8 in self.searched_exact_paths:
 
3919
                # We've examined this path.
 
3920
                continue
 
3921
            path_entries = self.state._entries_for_path(path_utf8)
 
3922
            # We need either one or two entries. If the path in
 
3923
            # self.target_index has moved (so the entry in source_index is in
 
3924
            # 'ar') then we need to also look for the entry for this path in
 
3925
            # self.source_index, to output the appropriate delete-or-rename.
 
3926
            selected_entries = []
 
3927
            found_item = False
 
3928
            for candidate_entry in path_entries:
 
3929
                # Find entries present in target at this path:
 
3930
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
3931
                    found_item = True
 
3932
                    selected_entries.append(candidate_entry)
 
3933
                # Find entries present in source at this path:
 
3934
                elif (self.source_index is not None and
 
3935
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
3936
                    found_item = True
 
3937
                    if candidate_entry[1][self.target_index][0] == 'a':
 
3938
                        # Deleted, emit it here.
 
3939
                        selected_entries.append(candidate_entry)
 
3940
                    else:
 
3941
                        # renamed, emit it when we process the directory it
 
3942
                        # ended up at.
 
3943
                        self.search_specific_file_parents.add(
 
3944
                            candidate_entry[1][self.target_index][1])
 
3945
            if not found_item:
 
3946
                raise AssertionError(
 
3947
                    "Missing entry for specific path parent %r, %r" % (
 
3948
                    path_utf8, path_entries))
 
3949
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
3950
            for entry in selected_entries:
 
3951
                if entry[0][2] in self.seen_ids:
 
3952
                    continue
 
3953
                result, changed = self._process_entry(entry, path_info)
 
3954
                if changed is None:
 
3955
                    raise AssertionError(
 
3956
                        "Got entry<->path mismatch for specific path "
 
3957
                        "%r entry %r path_info %r " % (
 
3958
                        path_utf8, entry, path_info))
 
3959
                # Only include changes - we're outside the users requested
 
3960
                # expansion.
 
3961
                if changed:
 
3962
                    self._gather_result_for_consistency(result)
 
3963
                    if (result[6][0] == 'directory' and
 
3964
                        result[6][1] != 'directory'):
 
3965
                        # This stopped being a directory, the old children have
 
3966
                        # to be included.
 
3967
                        if entry[1][self.source_index][0] == 'r':
 
3968
                            # renamed, take the source path
 
3969
                            entry_path_utf8 = entry[1][self.source_index][1]
 
3970
                        else:
 
3971
                            entry_path_utf8 = path_utf8
 
3972
                        initial_key = (entry_path_utf8, '', '')
 
3973
                        block_index, _ = self.state._find_block_index_from_key(
 
3974
                            initial_key)
 
3975
                        if block_index == 0:
 
3976
                            # The children of the root are in block index 1.
 
3977
                            block_index +=1
 
3978
                        current_block = None
 
3979
                        if block_index < len(self.state._dirblocks):
 
3980
                            current_block = self.state._dirblocks[block_index]
 
3981
                            if not osutils.is_inside(
 
3982
                                entry_path_utf8, current_block[0]):
 
3983
                                # No entries for this directory at all.
 
3984
                                current_block = None
 
3985
                        if current_block is not None:
 
3986
                            for entry in current_block[1]:
 
3987
                                if entry[1][self.source_index][0] in 'ar':
 
3988
                                    # Not in the source tree, so doesn't have to be
 
3989
                                    # included.
 
3990
                                    continue
 
3991
                                # Path of the entry itself.
 
3992
 
 
3993
                                self.search_specific_file_parents.add(
 
3994
                                    osutils.pathjoin(*entry[0][:2]))
 
3995
                if changed or self.include_unchanged:
 
3996
                    yield result
 
3997
            self.searched_exact_paths.add(path_utf8)
 
3998
 
 
3999
    def _path_info(self, utf8_path, unicode_path):
 
4000
        """Generate path_info for unicode_path.
 
4001
 
 
4002
        :return: None if unicode_path does not exist, or a path_info tuple.
 
4003
        """
 
4004
        abspath = self.tree.abspath(unicode_path)
 
4005
        try:
 
4006
            stat = os.lstat(abspath)
 
4007
        except OSError, e:
 
4008
            if e.errno == errno.ENOENT:
 
4009
                # the path does not exist.
 
4010
                return None
 
4011
            else:
 
4012
                raise
 
4013
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
4014
        dir_info = (utf8_path, utf8_basename,
 
4015
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
4016
            abspath)
 
4017
        if dir_info[2] == 'directory':
 
4018
            if self.tree._directory_is_tree_reference(
 
4019
                unicode_path):
 
4020
                self.root_dir_info = self.root_dir_info[:2] + \
 
4021
                    ('tree-reference',) + self.root_dir_info[3:]
 
4022
        return dir_info
3574
4023
 
3575
4024
 
3576
4025
# Try to load the compiled form if possible
3577
4026
try:
3578
 
    from bzrlib._dirstate_helpers_c import (
3579
 
        _read_dirblocks_c as _read_dirblocks,
3580
 
        bisect_dirblock_c as bisect_dirblock,
3581
 
        _bisect_path_left_c as _bisect_path_left,
3582
 
        _bisect_path_right_c as _bisect_path_right,
3583
 
        cmp_by_dirs_c as cmp_by_dirs,
 
4027
    from bzrlib._dirstate_helpers_pyx import (
 
4028
        _read_dirblocks,
 
4029
        bisect_dirblock,
 
4030
        _bisect_path_left,
 
4031
        _bisect_path_right,
 
4032
        cmp_by_dirs,
3584
4033
        ProcessEntryC as _process_entry,
3585
4034
        update_entry as update_entry,
3586
4035
        )
3587
 
except ImportError:
 
4036
except ImportError, e:
 
4037
    osutils.failed_to_load_extension(e)
3588
4038
    from bzrlib._dirstate_helpers_py import (
3589
 
        _read_dirblocks_py as _read_dirblocks,
3590
 
        bisect_dirblock_py as bisect_dirblock,
3591
 
        _bisect_path_left_py as _bisect_path_left,
3592
 
        _bisect_path_right_py as _bisect_path_right,
3593
 
        cmp_by_dirs_py as cmp_by_dirs,
 
4039
        _read_dirblocks,
 
4040
        bisect_dirblock,
 
4041
        _bisect_path_left,
 
4042
        _bisect_path_right,
 
4043
        cmp_by_dirs,
3594
4044
        )
 
4045
    # FIXME: It would be nice to be able to track moved lines so that the
 
4046
    # corresponding python code can be moved to the _dirstate_helpers_py
 
4047
    # module. I don't want to break the history for this important piece of
 
4048
    # code so I left the code here -- vila 20090622
 
4049
    update_entry = py_update_entry
 
4050
    _process_entry = ProcessEntryPython