~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2010-09-29 22:03:03 UTC
  • mfrom: (5416.2.6 jam-integration)
  • Revision ID: pqm@pqm.ubuntu.com-20100929220303-cr95h8iwtggco721
(mbp) Add 'break-lock --force'

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2006, 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2006-2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
204
204
import bisect
205
205
import binascii
206
206
import errno
 
207
import operator
207
208
import os
208
209
from stat import S_IEXEC
209
210
import stat
219
220
    inventory,
220
221
    lock,
221
222
    osutils,
 
223
    static_tuple,
222
224
    trace,
223
225
    )
224
226
 
547
549
           self._ensure_block(block_index, entry_index, utf8path)
548
550
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
549
551
        if self._id_index:
550
 
            self._id_index.setdefault(entry_key[2], set()).add(entry_key)
 
552
            self._add_to_id_index(self._id_index, entry_key)
551
553
 
552
554
    def _bisect(self, paths):
553
555
        """Bisect through the disk structure for specific rows.
1277
1279
    def update_by_delta(self, delta):
1278
1280
        """Apply an inventory delta to the dirstate for tree 0
1279
1281
 
 
1282
        This is the workhorse for apply_inventory_delta in dirstate based
 
1283
        trees.
 
1284
 
1280
1285
        :param delta: An inventory delta.  See Inventory.apply_delta for
1281
1286
            details.
1282
1287
        """
1283
1288
        self._read_dirblocks_if_needed()
 
1289
        encode = cache_utf8.encode
1284
1290
        insertions = {}
1285
1291
        removals = {}
1286
 
        for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
 
1292
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1293
        # items or items placed under files/links/tree-references. We get
 
1294
        # references from every item in the delta that is not a deletion and
 
1295
        # is not itself the root.
 
1296
        parents = set()
 
1297
        # Added ids must not be in the dirstate already. This set holds those
 
1298
        # ids.
 
1299
        new_ids = set()
 
1300
        # This loop transforms the delta to single atomic operations that can
 
1301
        # be executed and validated.
 
1302
        for old_path, new_path, file_id, inv_entry in sorted(
 
1303
            inventory._check_delta_unique_old_paths(
 
1304
            inventory._check_delta_unique_new_paths(
 
1305
            inventory._check_delta_ids_match_entry(
 
1306
            inventory._check_delta_ids_are_valid(
 
1307
            inventory._check_delta_new_path_entry_both_or_None(delta))))),
 
1308
            reverse=True):
1287
1309
            if (file_id in insertions) or (file_id in removals):
1288
 
                raise AssertionError("repeated file id in delta %r" % (file_id,))
 
1310
                raise errors.InconsistentDelta(old_path or new_path, file_id,
 
1311
                    "repeated file_id")
1289
1312
            if old_path is not None:
1290
1313
                old_path = old_path.encode('utf-8')
1291
1314
                removals[file_id] = old_path
 
1315
            else:
 
1316
                new_ids.add(file_id)
1292
1317
            if new_path is not None:
 
1318
                if inv_entry is None:
 
1319
                    raise errors.InconsistentDelta(new_path, file_id,
 
1320
                        "new_path with no entry")
1293
1321
                new_path = new_path.encode('utf-8')
1294
 
                dirname, basename = osutils.split(new_path)
1295
 
                key = (dirname, basename, file_id)
 
1322
                dirname_utf8, basename = osutils.split(new_path)
 
1323
                if basename:
 
1324
                    parents.add((dirname_utf8, inv_entry.parent_id))
 
1325
                key = (dirname_utf8, basename, file_id)
1296
1326
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1297
1327
                if minikind == 't':
1298
 
                    fingerprint = inv_entry.reference_revision
 
1328
                    fingerprint = inv_entry.reference_revision or ''
1299
1329
                else:
1300
1330
                    fingerprint = ''
1301
1331
                insertions[file_id] = (key, minikind, inv_entry.executable,
1310
1340
                    minikind = child[1][0][0]
1311
1341
                    fingerprint = child[1][0][4]
1312
1342
                    executable = child[1][0][3]
1313
 
                    old_child_path = osutils.pathjoin(child[0][0],
1314
 
                                                      child[0][1])
 
1343
                    old_child_path = osutils.pathjoin(child_dirname,
 
1344
                                                      child_basename)
1315
1345
                    removals[child[0][2]] = old_child_path
1316
1346
                    child_suffix = child_dirname[len(old_path):]
1317
1347
                    new_child_dirname = (new_path + child_suffix)
1318
1348
                    key = (new_child_dirname, child_basename, child[0][2])
1319
 
                    new_child_path = os.path.join(new_child_dirname,
1320
 
                                                  child_basename)
 
1349
                    new_child_path = osutils.pathjoin(new_child_dirname,
 
1350
                                                      child_basename)
1321
1351
                    insertions[child[0][2]] = (key, minikind, executable,
1322
1352
                                               fingerprint, new_child_path)
1323
 
        self._apply_removals(removals.values())
1324
 
        self._apply_insertions(insertions.values())
 
1353
        self._check_delta_ids_absent(new_ids, delta, 0)
 
1354
        try:
 
1355
            self._apply_removals(removals.iteritems())
 
1356
            self._apply_insertions(insertions.values())
 
1357
            # Validate parents
 
1358
            self._after_delta_check_parents(parents, 0)
 
1359
        except errors.BzrError, e:
 
1360
            self._changes_aborted = True
 
1361
            if 'integrity error' not in str(e):
 
1362
                raise
 
1363
            # _get_entry raises BzrError when a request is inconsistent; we
 
1364
            # want such errors to be shown as InconsistentDelta - and that 
 
1365
            # fits the behaviour we trigger.
 
1366
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1325
1367
 
1326
1368
    def _apply_removals(self, removals):
1327
 
        for path in sorted(removals, reverse=True):
 
1369
        for file_id, path in sorted(removals, reverse=True,
 
1370
            key=operator.itemgetter(1)):
1328
1371
            dirname, basename = osutils.split(path)
1329
1372
            block_i, entry_i, d_present, f_present = \
1330
1373
                self._get_block_entry_index(dirname, basename, 0)
1331
 
            entry = self._dirblocks[block_i][1][entry_i]
 
1374
            try:
 
1375
                entry = self._dirblocks[block_i][1][entry_i]
 
1376
            except IndexError:
 
1377
                self._changes_aborted = True
 
1378
                raise errors.InconsistentDelta(path, file_id,
 
1379
                    "Wrong path for old path.")
 
1380
            if not f_present or entry[1][0][0] in 'ar':
 
1381
                self._changes_aborted = True
 
1382
                raise errors.InconsistentDelta(path, file_id,
 
1383
                    "Wrong path for old path.")
 
1384
            if file_id != entry[0][2]:
 
1385
                self._changes_aborted = True
 
1386
                raise errors.InconsistentDelta(path, file_id,
 
1387
                    "Attempt to remove path has wrong id - found %r."
 
1388
                    % entry[0][2])
1332
1389
            self._make_absent(entry)
1333
1390
            # See if we have a malformed delta: deleting a directory must not
1334
1391
            # leave crud behind. This increases the number of bisects needed
1342
1399
                # be due to it being in a parent tree, or a corrupt delta.
1343
1400
                for child_entry in self._dirblocks[block_i][1]:
1344
1401
                    if child_entry[1][0][0] not in ('r', 'a'):
 
1402
                        self._changes_aborted = True
1345
1403
                        raise errors.InconsistentDelta(path, entry[0][2],
1346
1404
                            "The file id was deleted but its children were "
1347
1405
                            "not deleted.")
1348
1406
 
1349
1407
    def _apply_insertions(self, adds):
1350
 
        for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1351
 
            self.update_minimal(key, minikind, executable, fingerprint,
1352
 
                                path_utf8=path_utf8)
 
1408
        try:
 
1409
            for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
 
1410
                self.update_minimal(key, minikind, executable, fingerprint,
 
1411
                                    path_utf8=path_utf8)
 
1412
        except errors.NotVersionedError:
 
1413
            self._changes_aborted = True
 
1414
            raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
 
1415
                "Missing parent")
1353
1416
 
1354
1417
    def update_basis_by_delta(self, delta, new_revid):
1355
1418
        """Update the parents of this tree after a commit.
1399
1462
        # At the same time, to reduce interface friction we convert the input
1400
1463
        # inventory entries to dirstate.
1401
1464
        root_only = ('', '')
 
1465
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1466
        # items or items placed under files/links/tree-references. We get
 
1467
        # references from every item in the delta that is not a deletion and
 
1468
        # is not itself the root.
 
1469
        parents = set()
 
1470
        # Added ids must not be in the dirstate already. This set holds those
 
1471
        # ids.
 
1472
        new_ids = set()
1402
1473
        for old_path, new_path, file_id, inv_entry in delta:
 
1474
            if inv_entry is not None and file_id != inv_entry.file_id:
 
1475
                raise errors.InconsistentDelta(new_path, file_id,
 
1476
                    "mismatched entry file_id %r" % inv_entry)
 
1477
            if new_path is not None:
 
1478
                if inv_entry is None:
 
1479
                    raise errors.InconsistentDelta(new_path, file_id,
 
1480
                        "new_path with no entry")
 
1481
                new_path_utf8 = encode(new_path)
 
1482
                # note the parent for validation
 
1483
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
 
1484
                if basename_utf8:
 
1485
                    parents.add((dirname_utf8, inv_entry.parent_id))
1403
1486
            if old_path is None:
1404
1487
                adds.append((None, encode(new_path), file_id,
1405
1488
                    inv_to_entry(inv_entry), True))
 
1489
                new_ids.add(file_id)
1406
1490
            elif new_path is None:
1407
1491
                deletes.append((encode(old_path), None, file_id, None, True))
1408
1492
            elif (old_path, new_path) != root_only:
1420
1504
                # for 'r' items on every pass.
1421
1505
                self._update_basis_apply_deletes(deletes)
1422
1506
                deletes = []
1423
 
                new_path_utf8 = encode(new_path)
1424
1507
                # Split into an add/delete pair recursively.
1425
1508
                adds.append((None, new_path_utf8, file_id,
1426
1509
                    inv_to_entry(inv_entry), False))
1452
1535
                # of everything.
1453
1536
                changes.append((encode(old_path), encode(new_path), file_id,
1454
1537
                    inv_to_entry(inv_entry)))
1455
 
 
1456
 
        # Finish expunging deletes/first half of renames.
1457
 
        self._update_basis_apply_deletes(deletes)
1458
 
        # Reinstate second half of renames and new paths.
1459
 
        self._update_basis_apply_adds(adds)
1460
 
        # Apply in-situ changes.
1461
 
        self._update_basis_apply_changes(changes)
 
1538
        self._check_delta_ids_absent(new_ids, delta, 1)
 
1539
        try:
 
1540
            # Finish expunging deletes/first half of renames.
 
1541
            self._update_basis_apply_deletes(deletes)
 
1542
            # Reinstate second half of renames and new paths.
 
1543
            self._update_basis_apply_adds(adds)
 
1544
            # Apply in-situ changes.
 
1545
            self._update_basis_apply_changes(changes)
 
1546
            # Validate parents
 
1547
            self._after_delta_check_parents(parents, 1)
 
1548
        except errors.BzrError, e:
 
1549
            self._changes_aborted = True
 
1550
            if 'integrity error' not in str(e):
 
1551
                raise
 
1552
            # _get_entry raises BzrError when a request is inconsistent; we
 
1553
            # want such errors to be shown as InconsistentDelta - and that 
 
1554
            # fits the behaviour we trigger. Partof this is driven by dirstate
 
1555
            # only supporting deltas that turn the basis into a closer fit to
 
1556
            # the active tree.
 
1557
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1462
1558
 
1463
1559
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1464
1560
        self._header_state = DirState.IN_MEMORY_MODIFIED
1465
1561
        self._id_index = None
1466
1562
        return
1467
1563
 
 
1564
    def _check_delta_ids_absent(self, new_ids, delta, tree_index):
 
1565
        """Check that none of the file_ids in new_ids are present in a tree."""
 
1566
        if not new_ids:
 
1567
            return
 
1568
        id_index = self._get_id_index()
 
1569
        for file_id in new_ids:
 
1570
            for key in id_index.get(file_id, ()):
 
1571
                block_i, entry_i, d_present, f_present = \
 
1572
                    self._get_block_entry_index(key[0], key[1], tree_index)
 
1573
                if not f_present:
 
1574
                    # In a different tree
 
1575
                    continue
 
1576
                entry = self._dirblocks[block_i][1][entry_i]
 
1577
                if entry[0][2] != file_id:
 
1578
                    # Different file_id, so not what we want.
 
1579
                    continue
 
1580
                # NB: No changes made before this helper is called, so no need
 
1581
                # to set the _changes_aborted flag.
 
1582
                raise errors.InconsistentDelta(
 
1583
                    ("%s/%s" % key[0:2]).decode('utf8'), file_id,
 
1584
                    "This file_id is new in the delta but already present in "
 
1585
                    "the target")
 
1586
 
1468
1587
    def _update_basis_apply_adds(self, adds):
1469
1588
        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
1470
1589
 
1535
1654
        null = DirState.NULL_PARENT_DETAILS
1536
1655
        for old_path, new_path, file_id, _, real_delete in deletes:
1537
1656
            if real_delete != (new_path is None):
 
1657
                self._changes_aborted = True
1538
1658
                raise AssertionError("bad delete delta")
1539
1659
            # the entry for this file_id must be in tree 1.
1540
1660
            dirname, basename = osutils.split(old_path)
1573
1693
                    # it is being resurrected here, so blank it out temporarily.
1574
1694
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1575
1695
 
 
1696
    def _after_delta_check_parents(self, parents, index):
 
1697
        """Check that parents required by the delta are all intact.
 
1698
        
 
1699
        :param parents: An iterable of (path_utf8, file_id) tuples which are
 
1700
            required to be present in tree 'index' at path_utf8 with id file_id
 
1701
            and be a directory.
 
1702
        :param index: The column in the dirstate to check for parents in.
 
1703
        """
 
1704
        for dirname_utf8, file_id in parents:
 
1705
            # Get the entry - the ensures that file_id, dirname_utf8 exists and
 
1706
            # has the right file id.
 
1707
            entry = self._get_entry(index, file_id, dirname_utf8)
 
1708
            if entry[1] is None:
 
1709
                self._changes_aborted = True
 
1710
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1711
                    file_id, "This parent is not present.")
 
1712
            # Parents of things must be directories
 
1713
            if entry[1][index][0] != 'd':
 
1714
                self._changes_aborted = True
 
1715
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1716
                    file_id, "This parent is not a directory.")
 
1717
 
1576
1718
    def _observed_sha1(self, entry, sha1, stat_value,
1577
1719
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1578
1720
        """Note the sha1 of a file.
1821
1963
        self._read_dirblocks_if_needed()
1822
1964
        if path_utf8 is not None:
1823
1965
            if type(path_utf8) is not str:
1824
 
                raise AssertionError('path_utf8 is not a str: %s %s'
 
1966
                raise errors.BzrError('path_utf8 is not a str: %s %r'
1825
1967
                    % (type(path_utf8), path_utf8))
1826
1968
            # path lookups are faster
1827
1969
            dirname, basename = osutils.split(path_utf8)
1839
1981
                                          ' tree_index, file_id and path')
1840
1982
            return entry
1841
1983
        else:
1842
 
            possible_keys = self._get_id_index().get(fileid_utf8, None)
 
1984
            possible_keys = self._get_id_index().get(fileid_utf8, ())
1843
1985
            if not possible_keys:
1844
1986
                return None, None
1845
1987
            for key in possible_keys:
1856
1998
                entry_index, present = self._find_entry_index(key, block)
1857
1999
                if present:
1858
2000
                    entry = self._dirblocks[block_index][1][entry_index]
 
2001
                    # TODO: We might want to assert that entry[0][2] ==
 
2002
                    #       fileid_utf8.
1859
2003
                    if entry[1][tree_index][0] in 'fdlt':
1860
2004
                        # this is the result we are looking for: the
1861
2005
                        # real home of this file_id in this tree.
2000
2144
                yield entry
2001
2145
 
2002
2146
    def _get_id_index(self):
2003
 
        """Get an id index of self._dirblocks."""
 
2147
        """Get an id index of self._dirblocks.
 
2148
        
 
2149
        This maps from file_id => [(directory, name, file_id)] entries where
 
2150
        that file_id appears in one of the trees.
 
2151
        """
2004
2152
        if self._id_index is None:
2005
2153
            id_index = {}
2006
2154
            for key, tree_details in self._iter_entries():
2007
 
                id_index.setdefault(key[2], set()).add(key)
 
2155
                self._add_to_id_index(id_index, key)
2008
2156
            self._id_index = id_index
2009
2157
        return self._id_index
2010
2158
 
 
2159
    def _add_to_id_index(self, id_index, entry_key):
 
2160
        """Add this entry to the _id_index mapping."""
 
2161
        # This code used to use a set for every entry in the id_index. However,
 
2162
        # it is *rare* to have more than one entry. So a set is a large
 
2163
        # overkill. And even when we do, we won't ever have more than the
 
2164
        # number of parent trees. Which is still a small number (rarely >2). As
 
2165
        # such, we use a simple tuple, and do our own uniqueness checks. While
 
2166
        # the 'in' check is O(N) since N is nicely bounded it shouldn't ever
 
2167
        # cause quadratic failure.
 
2168
        # TODO: This should use StaticTuple
 
2169
        file_id = entry_key[2]
 
2170
        entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
 
2171
        if file_id not in id_index:
 
2172
            id_index[file_id] = static_tuple.StaticTuple(entry_key,)
 
2173
        else:
 
2174
            entry_keys = id_index[file_id]
 
2175
            if entry_key not in entry_keys:
 
2176
                id_index[file_id] = entry_keys + (entry_key,)
 
2177
 
 
2178
    def _remove_from_id_index(self, id_index, entry_key):
 
2179
        """Remove this entry from the _id_index mapping.
 
2180
 
 
2181
        It is an programming error to call this when the entry_key is not
 
2182
        already present.
 
2183
        """
 
2184
        file_id = entry_key[2]
 
2185
        entry_keys = list(id_index[file_id])
 
2186
        entry_keys.remove(entry_key)
 
2187
        id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
 
2188
 
2011
2189
    def _get_output_lines(self, lines):
2012
2190
        """Format lines for final output.
2013
2191
 
2213
2391
        self.update_minimal(('', '', new_id), 'd',
2214
2392
            path_utf8='', packed_stat=entry[1][0][4])
2215
2393
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2216
 
        if self._id_index is not None:
2217
 
            self._id_index.setdefault(new_id, set()).add(entry[0])
2218
2394
 
2219
2395
    def set_parent_trees(self, trees, ghosts):
2220
2396
        """Set the parent trees for the dirstate.
2273
2449
                continue
2274
2450
            by_path[entry[0]] = [entry[1][0]] + \
2275
2451
                [DirState.NULL_PARENT_DETAILS] * parent_count
2276
 
            id_index[entry[0][2]] = set([entry[0]])
 
2452
            # TODO: Possibly inline this, since we know it isn't present yet
 
2453
            #       id_index[entry[0][2]] = (entry[0],)
 
2454
            self._add_to_id_index(id_index, entry[0])
2277
2455
 
2278
2456
        # now the parent trees:
2279
2457
        for tree_index, tree in enumerate(parent_trees):
2301
2479
                new_entry_key = (dirname, basename, file_id)
2302
2480
                # tree index consistency: All other paths for this id in this tree
2303
2481
                # index must point to the correct path.
2304
 
                for entry_key in id_index.setdefault(file_id, set()):
 
2482
                for entry_key in id_index.get(file_id, ()):
2305
2483
                    # TODO:PROFILING: It might be faster to just update
2306
2484
                    # rather than checking if we need to, and then overwrite
2307
2485
                    # the one we are located at.
2313
2491
                        by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2314
2492
                # by path consistency: Insert into an existing path record (trivial), or
2315
2493
                # add a new one with relocation pointers for the other tree indexes.
2316
 
                if new_entry_key in id_index[file_id]:
 
2494
                entry_keys = id_index.get(file_id, ())
 
2495
                if new_entry_key in entry_keys:
2317
2496
                    # there is already an entry where this data belongs, just insert it.
2318
2497
                    by_path[new_entry_key][tree_index] = \
2319
2498
                        self._inv_entry_to_details(entry)
2324
2503
                    new_details = []
2325
2504
                    for lookup_index in xrange(tree_index):
2326
2505
                        # boundary case: this is the first occurence of file_id
2327
 
                        # so there are no id_indexs, possibly take this out of
 
2506
                        # so there are no id_indexes, possibly take this out of
2328
2507
                        # the loop?
2329
 
                        if not len(id_index[file_id]):
 
2508
                        if not len(entry_keys):
2330
2509
                            new_details.append(DirState.NULL_PARENT_DETAILS)
2331
2510
                        else:
2332
2511
                            # grab any one entry, use it to find the right path.
2333
2512
                            # TODO: optimise this to reduce memory use in highly
2334
2513
                            # fragmented situations by reusing the relocation
2335
2514
                            # records.
2336
 
                            a_key = iter(id_index[file_id]).next()
 
2515
                            a_key = iter(entry_keys).next()
2337
2516
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
2338
2517
                                # its a pointer or missing statement, use it as is.
2339
2518
                                new_details.append(by_path[a_key][lookup_index])
2344
2523
                    new_details.append(self._inv_entry_to_details(entry))
2345
2524
                    new_details.extend(new_location_suffix)
2346
2525
                    by_path[new_entry_key] = new_details
2347
 
                    id_index[file_id].add(new_entry_key)
 
2526
                    self._add_to_id_index(id_index, new_entry_key)
2348
2527
        # --- end generation of full tree mappings
2349
2528
 
2350
2529
        # sort and output all the entries
2379
2558
        if 'evil' in debug.debug_flags:
2380
2559
            trace.mutter_callsite(1,
2381
2560
                "set_state_from_inventory called; please mutate the tree instead")
 
2561
        tracing = 'dirstate' in debug.debug_flags
 
2562
        if tracing:
 
2563
            trace.mutter("set_state_from_inventory trace:")
2382
2564
        self._read_dirblocks_if_needed()
2383
2565
        # sketch:
2384
2566
        # Two iterators: current data and new data, both in dirblock order.
2393
2575
        new_iterator = new_inv.iter_entries_by_dir()
2394
2576
        # we will be modifying the dirstate, so we need a stable iterator. In
2395
2577
        # future we might write one, for now we just clone the state into a
2396
 
        # list - which is a shallow copy.
 
2578
        # list using a copy so that we see every original item and don't have
 
2579
        # to adjust the position when items are inserted or deleted in the
 
2580
        # underlying dirstate.
2397
2581
        old_iterator = iter(list(self._iter_entries()))
2398
2582
        # both must have roots so this is safe:
2399
2583
        current_new = new_iterator.next()
2433
2617
            # we make both end conditions explicit
2434
2618
            if not current_old:
2435
2619
                # old is finished: insert current_new into the state.
 
2620
                if tracing:
 
2621
                    trace.mutter("Appending from new '%s'.",
 
2622
                        new_path_utf8.decode('utf8'))
2436
2623
                self.update_minimal(new_entry_key, current_new_minikind,
2437
2624
                    executable=current_new[1].executable,
2438
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2625
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2626
                    fullscan=True)
2439
2627
                current_new = advance(new_iterator)
2440
2628
            elif not current_new:
2441
2629
                # new is finished
 
2630
                if tracing:
 
2631
                    trace.mutter("Truncating from old '%s/%s'.",
 
2632
                        current_old[0][0].decode('utf8'),
 
2633
                        current_old[0][1].decode('utf8'))
2442
2634
                self._make_absent(current_old)
2443
2635
                current_old = advance(old_iterator)
2444
2636
            elif new_entry_key == current_old[0]:
2451
2643
                # kind has changed.
2452
2644
                if (current_old[1][0][3] != current_new[1].executable or
2453
2645
                    current_old[1][0][0] != current_new_minikind):
 
2646
                    if tracing:
 
2647
                        trace.mutter("Updating in-place change '%s'.",
 
2648
                            new_path_utf8.decode('utf8'))
2454
2649
                    self.update_minimal(current_old[0], current_new_minikind,
2455
2650
                        executable=current_new[1].executable,
2456
 
                        path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2651
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2652
                        fullscan=True)
2457
2653
                # both sides are dealt with, move on
2458
2654
                current_old = advance(old_iterator)
2459
2655
                current_new = advance(new_iterator)
2462
2658
                      and new_entry_key[1:] < current_old[0][1:])):
2463
2659
                # new comes before:
2464
2660
                # add a entry for this and advance new
 
2661
                if tracing:
 
2662
                    trace.mutter("Inserting from new '%s'.",
 
2663
                        new_path_utf8.decode('utf8'))
2465
2664
                self.update_minimal(new_entry_key, current_new_minikind,
2466
2665
                    executable=current_new[1].executable,
2467
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2666
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2667
                    fullscan=True)
2468
2668
                current_new = advance(new_iterator)
2469
2669
            else:
2470
2670
                # we've advanced past the place where the old key would be,
2471
2671
                # without seeing it in the new list.  so it must be gone.
 
2672
                if tracing:
 
2673
                    trace.mutter("Deleting from old '%s/%s'.",
 
2674
                        current_old[0][0].decode('utf8'),
 
2675
                        current_old[0][1].decode('utf8'))
2472
2676
                self._make_absent(current_old)
2473
2677
                current_old = advance(old_iterator)
2474
2678
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2475
2679
        self._id_index = None
2476
2680
        self._packed_stat_index = None
 
2681
        if tracing:
 
2682
            trace.mutter("set_state_from_inventory complete.")
2477
2683
 
2478
2684
    def _make_absent(self, current_old):
2479
2685
        """Mark current_old - an entry - as absent for tree 0.
2505
2711
            block[1].pop(entry_index)
2506
2712
            # if we have an id_index in use, remove this key from it for this id.
2507
2713
            if self._id_index is not None:
2508
 
                self._id_index[current_old[0][2]].remove(current_old[0])
 
2714
                self._remove_from_id_index(self._id_index, current_old[0])
2509
2715
        # update all remaining keys for this id to record it as absent. The
2510
2716
        # existing details may either be the record we are marking as deleted
2511
2717
        # (if there were other trees with the id present at this path), or may
2528
2734
        return last_reference
2529
2735
 
2530
2736
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2531
 
                       packed_stat=None, size=0, path_utf8=None):
 
2737
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
2532
2738
        """Update an entry to the state in tree 0.
2533
2739
 
2534
2740
        This will either create a new entry at 'key' or update an existing one.
2545
2751
        :param size: Size information for new entry
2546
2752
        :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2547
2753
                extra computation.
 
2754
        :param fullscan: If True then a complete scan of the dirstate is being
 
2755
            done and checking for duplicate rows should not be done. This
 
2756
            should only be set by set_state_from_inventory and similar methods.
2548
2757
 
2549
2758
        If packed_stat and fingerprint are not given, they're invalidated in
2550
2759
        the entry.
2559
2768
        new_details = (minikind, fingerprint, size, executable, packed_stat)
2560
2769
        id_index = self._get_id_index()
2561
2770
        if not present:
 
2771
            # New record. Check there isn't a entry at this path already.
 
2772
            if not fullscan:
 
2773
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
 
2774
                while low_index < len(block):
 
2775
                    entry = block[low_index]
 
2776
                    if entry[0][0:2] == key[0:2]:
 
2777
                        if entry[1][0][0] not in 'ar':
 
2778
                            # This entry has the same path (but a different id) as
 
2779
                            # the new entry we're adding, and is present in ths
 
2780
                            # tree.
 
2781
                            raise errors.InconsistentDelta(
 
2782
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
 
2783
                                "Attempt to add item at path already occupied by "
 
2784
                                "id %r" % entry[0][2])
 
2785
                        low_index += 1
 
2786
                    else:
 
2787
                        break
2562
2788
            # new entry, synthesis cross reference here,
2563
 
            existing_keys = id_index.setdefault(key[2], set())
 
2789
            existing_keys = id_index.get(key[2], ())
2564
2790
            if not existing_keys:
2565
2791
                # not currently in the state, simplest case
2566
2792
                new_entry = key, [new_details] + self._empty_parent_info()
2569
2795
                # grab one of them and use it to generate parent
2570
2796
                # relocation/absent entries.
2571
2797
                new_entry = key, [new_details]
2572
 
                for other_key in existing_keys:
 
2798
                # existing_keys can be changed as we iterate.
 
2799
                for other_key in tuple(existing_keys):
2573
2800
                    # change the record at other to be a pointer to this new
2574
2801
                    # record. The loop looks similar to the change to
2575
2802
                    # relocations when updating an existing record but its not:
2576
2803
                    # the test for existing kinds is different: this can be
2577
2804
                    # factored out to a helper though.
2578
 
                    other_block_index, present = self._find_block_index_from_key(other_key)
2579
 
                    if not present:
2580
 
                        raise AssertionError('could not find block for %s' % (other_key,))
2581
 
                    other_entry_index, present = self._find_entry_index(other_key,
2582
 
                                            self._dirblocks[other_block_index][1])
2583
 
                    if not present:
2584
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2805
                    other_block_index, present = self._find_block_index_from_key(
 
2806
                        other_key)
 
2807
                    if not present:
 
2808
                        raise AssertionError('could not find block for %s' % (
 
2809
                            other_key,))
 
2810
                    other_block = self._dirblocks[other_block_index][1]
 
2811
                    other_entry_index, present = self._find_entry_index(
 
2812
                        other_key, other_block)
 
2813
                    if not present:
 
2814
                        raise AssertionError(
 
2815
                            'update_minimal: could not find other entry for %s'
 
2816
                            % (other_key,))
2585
2817
                    if path_utf8 is None:
2586
2818
                        raise AssertionError('no path')
2587
 
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2588
 
                        ('r', path_utf8, 0, False, '')
 
2819
                    # Turn this other location into a reference to the new
 
2820
                    # location. This also updates the aliased iterator
 
2821
                    # (current_old in set_state_from_inventory) so that the old
 
2822
                    # entry, if not already examined, is skipped over by that
 
2823
                    # loop.
 
2824
                    other_entry = other_block[other_entry_index]
 
2825
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
 
2826
                    if self._maybe_remove_row(other_block, other_entry_index,
 
2827
                                              id_index):
 
2828
                        # If the row holding this was removed, we need to
 
2829
                        # recompute where this entry goes
 
2830
                        entry_index, _ = self._find_entry_index(key, block)
2589
2831
 
 
2832
                # This loop:
 
2833
                # adds a tuple to the new details for each column
 
2834
                #  - either by copying an existing relocation pointer inside that column
 
2835
                #  - or by creating a new pointer to the right row inside that column
2590
2836
                num_present_parents = self._num_present_parents()
 
2837
                if num_present_parents:
 
2838
                    # TODO: This re-evaluates the existing_keys set, do we need
 
2839
                    #       to do that ourselves?
 
2840
                    other_key = list(existing_keys)[0]
2591
2841
                for lookup_index in xrange(1, num_present_parents + 1):
2592
2842
                    # grab any one entry, use it to find the right path.
2593
2843
                    # TODO: optimise this to reduce memory use in highly
2600
2850
                    update_entry_index, present = \
2601
2851
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
2602
2852
                    if not present:
2603
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2853
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
2604
2854
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
2605
2855
                    if update_details[0] in 'ar': # relocated, absent
2606
2856
                        # its a pointer or absent in lookup_index's tree, use
2611
2861
                        pointer_path = osutils.pathjoin(*other_key[0:2])
2612
2862
                        new_entry[1].append(('r', pointer_path, 0, False, ''))
2613
2863
            block.insert(entry_index, new_entry)
2614
 
            existing_keys.add(key)
 
2864
            self._add_to_id_index(id_index, key)
2615
2865
        else:
2616
2866
            # Does the new state matter?
2617
2867
            block[entry_index][1][0] = new_details
2626
2876
            # converted to relocated.
2627
2877
            if path_utf8 is None:
2628
2878
                raise AssertionError('no path')
2629
 
            for entry_key in id_index.setdefault(key[2], set()):
 
2879
            existing_keys = id_index.get(key[2], ())
 
2880
            if key not in existing_keys:
 
2881
                raise AssertionError('We found the entry in the blocks, but'
 
2882
                    ' the key is not in the id_index.'
 
2883
                    ' key: %s, existing_keys: %s' % (key, existing_keys))
 
2884
            for entry_key in existing_keys:
2630
2885
                # TODO:PROFILING: It might be faster to just update
2631
2886
                # rather than checking if we need to, and then overwrite
2632
2887
                # the one we are located at.
2652
2907
 
2653
2908
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2654
2909
 
 
2910
    def _maybe_remove_row(self, block, index, id_index):
 
2911
        """Remove index if it is absent or relocated across the row.
 
2912
        
 
2913
        id_index is updated accordingly.
 
2914
        :return: True if we removed the row, False otherwise
 
2915
        """
 
2916
        present_in_row = False
 
2917
        entry = block[index]
 
2918
        for column in entry[1]:
 
2919
            if column[0] not in 'ar':
 
2920
                present_in_row = True
 
2921
                break
 
2922
        if not present_in_row:
 
2923
            block.pop(index)
 
2924
            self._remove_from_id_index(id_index, entry[0])
 
2925
            return True
 
2926
        return False
 
2927
 
2655
2928
    def _validate(self):
2656
2929
        """Check that invariants on the dirblock are correct.
2657
2930
 
2791
3064
            if absent_positions == tree_count:
2792
3065
                raise AssertionError(
2793
3066
                    "entry %r has no data for any tree." % (entry,))
 
3067
        if self._id_index is not None:
 
3068
            for file_id, entry_keys in self._id_index.iteritems():
 
3069
                for entry_key in entry_keys:
 
3070
                    if entry_key[2] != file_id:
 
3071
                        raise AssertionError(
 
3072
                            'file_id %r did not match entry key %s'
 
3073
                            % (file_id, entry_key))
 
3074
                if len(entry_keys) != len(set(entry_keys)):
 
3075
                    raise AssertionError(
 
3076
                        'id_index contained non-unique data for %s'
 
3077
                        % (entry_keys,))
2794
3078
 
2795
3079
    def _wipe_state(self):
2796
3080
        """Forget all state information about the dirstate."""
2938
3222
                           False, DirState.NULLSTAT)
2939
3223
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
2940
3224
    return link_or_sha1
2941
 
update_entry = py_update_entry
2942
3225
 
2943
3226
 
2944
3227
class ProcessEntryPython(object):
2945
3228
 
2946
 
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
 
3229
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2947
3230
        "last_source_parent", "last_target_parent", "include_unchanged",
2948
 
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2949
 
        "search_specific_files", "state", "source_index", "target_index",
2950
 
        "want_unversioned", "tree"]
 
3231
        "partial", "use_filesystem_for_exec", "utf8_decode",
 
3232
        "searched_specific_files", "search_specific_files",
 
3233
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
 
3234
        "state", "source_index", "target_index", "want_unversioned", "tree"]
2951
3235
 
2952
3236
    def __init__(self, include_unchanged, use_filesystem_for_exec,
2953
3237
        search_specific_files, state, source_index, target_index,
2954
3238
        want_unversioned, tree):
2955
3239
        self.old_dirname_to_file_id = {}
2956
3240
        self.new_dirname_to_file_id = {}
2957
 
        # Just a sentry, so that _process_entry can say that this
2958
 
        # record is handled, but isn't interesting to process (unchanged)
2959
 
        self.uninteresting = object()
 
3241
        # Are we doing a partial iter_changes?
 
3242
        self.partial = search_specific_files != set([''])
2960
3243
        # Using a list so that we can access the values and change them in
2961
3244
        # nested scope. Each one is [path, file_id, entry]
2962
3245
        self.last_source_parent = [None, None]
2965
3248
        self.use_filesystem_for_exec = use_filesystem_for_exec
2966
3249
        self.utf8_decode = cache_utf8._utf8_decode
2967
3250
        # for all search_indexs in each path at or under each element of
2968
 
        # search_specific_files, if the detail is relocated: add the id, and add the
2969
 
        # relocated path as one to search if its not searched already. If the
2970
 
        # detail is not relocated, add the id.
 
3251
        # search_specific_files, if the detail is relocated: add the id, and
 
3252
        # add the relocated path as one to search if its not searched already.
 
3253
        # If the detail is not relocated, add the id.
2971
3254
        self.searched_specific_files = set()
 
3255
        # When we search exact paths without expanding downwards, we record
 
3256
        # that here.
 
3257
        self.searched_exact_paths = set()
2972
3258
        self.search_specific_files = search_specific_files
 
3259
        # The parents up to the root of the paths we are searching.
 
3260
        # After all normal paths are returned, these specific items are returned.
 
3261
        self.search_specific_file_parents = set()
 
3262
        # The ids we've sent out in the delta.
 
3263
        self.seen_ids = set()
2973
3264
        self.state = state
2974
3265
        self.source_index = source_index
2975
3266
        self.target_index = target_index
 
3267
        if target_index != 0:
 
3268
            # A lot of code in here depends on target_index == 0
 
3269
            raise errors.BzrError('unsupported target index')
2976
3270
        self.want_unversioned = want_unversioned
2977
3271
        self.tree = tree
2978
3272
 
2980
3274
        """Compare an entry and real disk to generate delta information.
2981
3275
 
2982
3276
        :param path_info: top_relpath, basename, kind, lstat, abspath for
2983
 
            the path of entry. If None, then the path is considered absent.
2984
 
            (Perhaps we should pass in a concrete entry for this ?)
 
3277
            the path of entry. If None, then the path is considered absent in 
 
3278
            the target (Perhaps we should pass in a concrete entry for this ?)
2985
3279
            Basename is returned as a utf8 string because we expect this
2986
3280
            tuple will be ignored, and don't want to take the time to
2987
3281
            decode.
2988
 
        :return: None if these don't match
2989
 
                 A tuple of information about the change, or
2990
 
                 the object 'uninteresting' if these match, but are
2991
 
                 basically identical.
 
3282
        :return: (iter_changes_result, changed). If the entry has not been
 
3283
            handled then changed is None. Otherwise it is False if no content
 
3284
            or metadata changes have occurred, and True if any content or
 
3285
            metadata change has occurred. If self.include_unchanged is True then
 
3286
            if changed is not None, iter_changes_result will always be a result
 
3287
            tuple. Otherwise, iter_changes_result is None unless changed is
 
3288
            True.
2992
3289
        """
2993
3290
        if self.source_index is None:
2994
3291
            source_details = DirState.NULL_PARENT_DETAILS
3093
3390
                        content_change = False
3094
3391
                    target_exec = False
3095
3392
                else:
3096
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
3393
                    if path is None:
 
3394
                        path = pathjoin(old_dirname, old_basename)
 
3395
                    raise errors.BadFileKindError(path, path_info[2])
3097
3396
            if source_minikind == 'd':
3098
3397
                if path is None:
3099
3398
                    old_path = path = pathjoin(old_dirname, old_basename)
3100
3399
                self.old_dirname_to_file_id[old_path] = file_id
3101
3400
            # parent id is the entry for the path in the target tree
3102
 
            if old_dirname == self.last_source_parent[0]:
 
3401
            if old_basename and old_dirname == self.last_source_parent[0]:
3103
3402
                source_parent_id = self.last_source_parent[1]
3104
3403
            else:
3105
3404
                try:
3115
3414
                    self.last_source_parent[0] = old_dirname
3116
3415
                    self.last_source_parent[1] = source_parent_id
3117
3416
            new_dirname = entry[0][0]
3118
 
            if new_dirname == self.last_target_parent[0]:
 
3417
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
3119
3418
                target_parent_id = self.last_target_parent[1]
3120
3419
            else:
3121
3420
                try:
3138
3437
                    self.last_target_parent[1] = target_parent_id
3139
3438
 
3140
3439
            source_exec = source_details[3]
3141
 
            if (self.include_unchanged
3142
 
                or content_change
 
3440
            changed = (content_change
3143
3441
                or source_parent_id != target_parent_id
3144
3442
                or old_basename != entry[0][1]
3145
3443
                or source_exec != target_exec
3146
 
                ):
 
3444
                )
 
3445
            if not changed and not self.include_unchanged:
 
3446
                return None, False
 
3447
            else:
3147
3448
                if old_path is None:
3148
3449
                    old_path = path = pathjoin(old_dirname, old_basename)
3149
3450
                    old_path_u = self.utf8_decode(old_path)[0]
3162
3463
                       (source_parent_id, target_parent_id),
3163
3464
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3164
3465
                       (source_kind, target_kind),
3165
 
                       (source_exec, target_exec))
3166
 
            else:
3167
 
                return self.uninteresting
 
3466
                       (source_exec, target_exec)), changed
3168
3467
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3169
3468
            # looks like a new file
3170
3469
            path = pathjoin(entry[0][0], entry[0][1])
3191
3490
                       (None, parent_id),
3192
3491
                       (None, self.utf8_decode(entry[0][1])[0]),
3193
3492
                       (None, path_info[2]),
3194
 
                       (None, target_exec))
 
3493
                       (None, target_exec)), True
3195
3494
            else:
3196
3495
                # Its a missing file, report it as such.
3197
3496
                return (entry[0][2],
3201
3500
                       (None, parent_id),
3202
3501
                       (None, self.utf8_decode(entry[0][1])[0]),
3203
3502
                       (None, None),
3204
 
                       (None, False))
 
3503
                       (None, False)), True
3205
3504
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3206
3505
            # unversioned, possibly, or possibly not deleted: we dont care.
3207
3506
            # if its still on disk, *and* theres no other entry at this
3219
3518
                   (parent_id, None),
3220
3519
                   (self.utf8_decode(entry[0][1])[0], None),
3221
3520
                   (DirState._minikind_to_kind[source_minikind], None),
3222
 
                   (source_details[3], None))
 
3521
                   (source_details[3], None)), True
3223
3522
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3224
3523
            # a rename; could be a true rename, or a rename inherited from
3225
3524
            # a renamed parent. TODO: handle this efficiently. Its not
3237
3536
                "source_minikind=%r, target_minikind=%r"
3238
3537
                % (source_minikind, target_minikind))
3239
3538
            ## import pdb;pdb.set_trace()
3240
 
        return None
 
3539
        return None, None
3241
3540
 
3242
3541
    def __iter__(self):
3243
3542
        return self
3244
3543
 
 
3544
    def _gather_result_for_consistency(self, result):
 
3545
        """Check a result we will yield to make sure we are consistent later.
 
3546
        
 
3547
        This gathers result's parents into a set to output later.
 
3548
 
 
3549
        :param result: A result tuple.
 
3550
        """
 
3551
        if not self.partial or not result[0]:
 
3552
            return
 
3553
        self.seen_ids.add(result[0])
 
3554
        new_path = result[1][1]
 
3555
        if new_path:
 
3556
            # Not the root and not a delete: queue up the parents of the path.
 
3557
            self.search_specific_file_parents.update(
 
3558
                osutils.parent_directories(new_path.encode('utf8')))
 
3559
            # Add the root directory which parent_directories does not
 
3560
            # provide.
 
3561
            self.search_specific_file_parents.add('')
 
3562
 
3245
3563
    def iter_changes(self):
3246
3564
        """Iterate over the changes."""
3247
3565
        utf8_decode = cache_utf8._utf8_decode
3248
3566
        _cmp_by_dirs = cmp_by_dirs
3249
3567
        _process_entry = self._process_entry
3250
 
        uninteresting = self.uninteresting
3251
3568
        search_specific_files = self.search_specific_files
3252
3569
        searched_specific_files = self.searched_specific_files
3253
3570
        splitpath = osutils.splitpath
3323
3640
                continue
3324
3641
            path_handled = False
3325
3642
            for entry in root_entries:
3326
 
                result = _process_entry(entry, root_dir_info)
3327
 
                if result is not None:
 
3643
                result, changed = _process_entry(entry, root_dir_info)
 
3644
                if changed is not None:
3328
3645
                    path_handled = True
3329
 
                    if result is not uninteresting:
 
3646
                    if changed:
 
3647
                        self._gather_result_for_consistency(result)
 
3648
                    if changed or self.include_unchanged:
3330
3649
                        yield result
3331
3650
            if self.want_unversioned and not path_handled and root_dir_info:
3332
3651
                new_executable = bool(
3442
3761
                        for current_entry in current_block[1]:
3443
3762
                            # entry referring to file not present on disk.
3444
3763
                            # advance the entry only, after processing.
3445
 
                            result = _process_entry(current_entry, None)
3446
 
                            if result is not None:
3447
 
                                if result is not uninteresting:
 
3764
                            result, changed = _process_entry(current_entry, None)
 
3765
                            if changed is not None:
 
3766
                                if changed:
 
3767
                                    self._gather_result_for_consistency(result)
 
3768
                                if changed or self.include_unchanged:
3448
3769
                                    yield result
3449
3770
                        block_index +=1
3450
3771
                        if (block_index < len(self.state._dirblocks) and
3480
3801
                        pass
3481
3802
                    elif current_path_info is None:
3482
3803
                        # no path is fine: the per entry code will handle it.
3483
 
                        result = _process_entry(current_entry, current_path_info)
3484
 
                        if result is not None:
3485
 
                            if result is not uninteresting:
 
3804
                        result, changed = _process_entry(current_entry, current_path_info)
 
3805
                        if changed is not None:
 
3806
                            if changed:
 
3807
                                self._gather_result_for_consistency(result)
 
3808
                            if changed or self.include_unchanged:
3486
3809
                                yield result
3487
3810
                    elif (current_entry[0][1] != current_path_info[1]
3488
3811
                          or current_entry[1][self.target_index][0] in 'ar'):
3501
3824
                        else:
3502
3825
                            # entry referring to file not present on disk.
3503
3826
                            # advance the entry only, after processing.
3504
 
                            result = _process_entry(current_entry, None)
3505
 
                            if result is not None:
3506
 
                                if result is not uninteresting:
 
3827
                            result, changed = _process_entry(current_entry, None)
 
3828
                            if changed is not None:
 
3829
                                if changed:
 
3830
                                    self._gather_result_for_consistency(result)
 
3831
                                if changed or self.include_unchanged:
3507
3832
                                    yield result
3508
3833
                            advance_path = False
3509
3834
                    else:
3510
 
                        result = _process_entry(current_entry, current_path_info)
3511
 
                        if result is not None:
 
3835
                        result, changed = _process_entry(current_entry, current_path_info)
 
3836
                        if changed is not None:
3512
3837
                            path_handled = True
3513
 
                            if result is not uninteresting:
 
3838
                            if changed:
 
3839
                                self._gather_result_for_consistency(result)
 
3840
                            if changed or self.include_unchanged:
3514
3841
                                yield result
3515
3842
                    if advance_entry and current_entry is not None:
3516
3843
                        entry_index += 1
3575
3902
                        current_dir_info = dir_iterator.next()
3576
3903
                    except StopIteration:
3577
3904
                        current_dir_info = None
3578
 
_process_entry = ProcessEntryPython
 
3905
        for result in self._iter_specific_file_parents():
 
3906
            yield result
 
3907
 
 
3908
    def _iter_specific_file_parents(self):
 
3909
        """Iter over the specific file parents."""
 
3910
        while self.search_specific_file_parents:
 
3911
            # Process the parent directories for the paths we were iterating.
 
3912
            # Even in extremely large trees this should be modest, so currently
 
3913
            # no attempt is made to optimise.
 
3914
            path_utf8 = self.search_specific_file_parents.pop()
 
3915
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
3916
                # We've examined this path.
 
3917
                continue
 
3918
            if path_utf8 in self.searched_exact_paths:
 
3919
                # We've examined this path.
 
3920
                continue
 
3921
            path_entries = self.state._entries_for_path(path_utf8)
 
3922
            # We need either one or two entries. If the path in
 
3923
            # self.target_index has moved (so the entry in source_index is in
 
3924
            # 'ar') then we need to also look for the entry for this path in
 
3925
            # self.source_index, to output the appropriate delete-or-rename.
 
3926
            selected_entries = []
 
3927
            found_item = False
 
3928
            for candidate_entry in path_entries:
 
3929
                # Find entries present in target at this path:
 
3930
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
3931
                    found_item = True
 
3932
                    selected_entries.append(candidate_entry)
 
3933
                # Find entries present in source at this path:
 
3934
                elif (self.source_index is not None and
 
3935
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
3936
                    found_item = True
 
3937
                    if candidate_entry[1][self.target_index][0] == 'a':
 
3938
                        # Deleted, emit it here.
 
3939
                        selected_entries.append(candidate_entry)
 
3940
                    else:
 
3941
                        # renamed, emit it when we process the directory it
 
3942
                        # ended up at.
 
3943
                        self.search_specific_file_parents.add(
 
3944
                            candidate_entry[1][self.target_index][1])
 
3945
            if not found_item:
 
3946
                raise AssertionError(
 
3947
                    "Missing entry for specific path parent %r, %r" % (
 
3948
                    path_utf8, path_entries))
 
3949
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
3950
            for entry in selected_entries:
 
3951
                if entry[0][2] in self.seen_ids:
 
3952
                    continue
 
3953
                result, changed = self._process_entry(entry, path_info)
 
3954
                if changed is None:
 
3955
                    raise AssertionError(
 
3956
                        "Got entry<->path mismatch for specific path "
 
3957
                        "%r entry %r path_info %r " % (
 
3958
                        path_utf8, entry, path_info))
 
3959
                # Only include changes - we're outside the users requested
 
3960
                # expansion.
 
3961
                if changed:
 
3962
                    self._gather_result_for_consistency(result)
 
3963
                    if (result[6][0] == 'directory' and
 
3964
                        result[6][1] != 'directory'):
 
3965
                        # This stopped being a directory, the old children have
 
3966
                        # to be included.
 
3967
                        if entry[1][self.source_index][0] == 'r':
 
3968
                            # renamed, take the source path
 
3969
                            entry_path_utf8 = entry[1][self.source_index][1]
 
3970
                        else:
 
3971
                            entry_path_utf8 = path_utf8
 
3972
                        initial_key = (entry_path_utf8, '', '')
 
3973
                        block_index, _ = self.state._find_block_index_from_key(
 
3974
                            initial_key)
 
3975
                        if block_index == 0:
 
3976
                            # The children of the root are in block index 1.
 
3977
                            block_index +=1
 
3978
                        current_block = None
 
3979
                        if block_index < len(self.state._dirblocks):
 
3980
                            current_block = self.state._dirblocks[block_index]
 
3981
                            if not osutils.is_inside(
 
3982
                                entry_path_utf8, current_block[0]):
 
3983
                                # No entries for this directory at all.
 
3984
                                current_block = None
 
3985
                        if current_block is not None:
 
3986
                            for entry in current_block[1]:
 
3987
                                if entry[1][self.source_index][0] in 'ar':
 
3988
                                    # Not in the source tree, so doesn't have to be
 
3989
                                    # included.
 
3990
                                    continue
 
3991
                                # Path of the entry itself.
 
3992
 
 
3993
                                self.search_specific_file_parents.add(
 
3994
                                    osutils.pathjoin(*entry[0][:2]))
 
3995
                if changed or self.include_unchanged:
 
3996
                    yield result
 
3997
            self.searched_exact_paths.add(path_utf8)
 
3998
 
 
3999
    def _path_info(self, utf8_path, unicode_path):
 
4000
        """Generate path_info for unicode_path.
 
4001
 
 
4002
        :return: None if unicode_path does not exist, or a path_info tuple.
 
4003
        """
 
4004
        abspath = self.tree.abspath(unicode_path)
 
4005
        try:
 
4006
            stat = os.lstat(abspath)
 
4007
        except OSError, e:
 
4008
            if e.errno == errno.ENOENT:
 
4009
                # the path does not exist.
 
4010
                return None
 
4011
            else:
 
4012
                raise
 
4013
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
4014
        dir_info = (utf8_path, utf8_basename,
 
4015
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
4016
            abspath)
 
4017
        if dir_info[2] == 'directory':
 
4018
            if self.tree._directory_is_tree_reference(
 
4019
                unicode_path):
 
4020
                self.root_dir_info = self.root_dir_info[:2] + \
 
4021
                    ('tree-reference',) + self.root_dir_info[3:]
 
4022
        return dir_info
3579
4023
 
3580
4024
 
3581
4025
# Try to load the compiled form if possible
3582
4026
try:
3583
 
    from bzrlib._dirstate_helpers_c import (
3584
 
        _read_dirblocks_c as _read_dirblocks,
3585
 
        bisect_dirblock_c as bisect_dirblock,
3586
 
        _bisect_path_left_c as _bisect_path_left,
3587
 
        _bisect_path_right_c as _bisect_path_right,
3588
 
        cmp_by_dirs_c as cmp_by_dirs,
 
4027
    from bzrlib._dirstate_helpers_pyx import (
 
4028
        _read_dirblocks,
 
4029
        bisect_dirblock,
 
4030
        _bisect_path_left,
 
4031
        _bisect_path_right,
 
4032
        cmp_by_dirs,
3589
4033
        ProcessEntryC as _process_entry,
3590
4034
        update_entry as update_entry,
3591
4035
        )
3592
 
except ImportError:
 
4036
except ImportError, e:
 
4037
    osutils.failed_to_load_extension(e)
3593
4038
    from bzrlib._dirstate_helpers_py import (
3594
 
        _read_dirblocks_py as _read_dirblocks,
3595
 
        bisect_dirblock_py as bisect_dirblock,
3596
 
        _bisect_path_left_py as _bisect_path_left,
3597
 
        _bisect_path_right_py as _bisect_path_right,
3598
 
        cmp_by_dirs_py as cmp_by_dirs,
 
4039
        _read_dirblocks,
 
4040
        bisect_dirblock,
 
4041
        _bisect_path_left,
 
4042
        _bisect_path_right,
 
4043
        cmp_by_dirs,
3599
4044
        )
 
4045
    # FIXME: It would be nice to be able to track moved lines so that the
 
4046
    # corresponding python code can be moved to the _dirstate_helpers_py
 
4047
    # module. I don't want to break the history for this important piece of
 
4048
    # code so I left the code here -- vila 20090622
 
4049
    update_entry = py_update_entry
 
4050
    _process_entry = ProcessEntryPython