~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-06-03 15:02:09 UTC
  • mfrom: (4398.2.1 export-test-fix)
  • Revision ID: pqm@pqm.ubuntu.com-20090603150209-szap3popp2j8fpl3
(John Szakmeister) Fix error formatting for tar related KnowFailure
        on Mac

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2006-2010 Canonical Ltd
 
1
# Copyright (C) 2006, 2007, 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
204
204
import bisect
205
205
import binascii
206
206
import errno
207
 
import operator
208
207
import os
209
208
from stat import S_IEXEC
210
209
import stat
220
219
    inventory,
221
220
    lock,
222
221
    osutils,
223
 
    static_tuple,
224
222
    trace,
225
223
    )
226
224
 
549
547
           self._ensure_block(block_index, entry_index, utf8path)
550
548
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
551
549
        if self._id_index:
552
 
            self._add_to_id_index(self._id_index, entry_key)
 
550
            self._id_index.setdefault(entry_key[2], set()).add(entry_key)
553
551
 
554
552
    def _bisect(self, paths):
555
553
        """Bisect through the disk structure for specific rows.
1279
1277
    def update_by_delta(self, delta):
1280
1278
        """Apply an inventory delta to the dirstate for tree 0
1281
1279
 
1282
 
        This is the workhorse for apply_inventory_delta in dirstate based
1283
 
        trees.
1284
 
 
1285
1280
        :param delta: An inventory delta.  See Inventory.apply_delta for
1286
1281
            details.
1287
1282
        """
1288
1283
        self._read_dirblocks_if_needed()
1289
 
        encode = cache_utf8.encode
1290
1284
        insertions = {}
1291
1285
        removals = {}
1292
 
        # Accumulate parent references (path_utf8, id), to check for parentless
1293
 
        # items or items placed under files/links/tree-references. We get
1294
 
        # references from every item in the delta that is not a deletion and
1295
 
        # is not itself the root.
1296
 
        parents = set()
1297
 
        # Added ids must not be in the dirstate already. This set holds those
1298
 
        # ids.
1299
 
        new_ids = set()
1300
 
        # This loop transforms the delta to single atomic operations that can
1301
 
        # be executed and validated.
1302
 
        for old_path, new_path, file_id, inv_entry in sorted(
1303
 
            inventory._check_delta_unique_old_paths(
1304
 
            inventory._check_delta_unique_new_paths(
1305
 
            inventory._check_delta_ids_match_entry(
1306
 
            inventory._check_delta_ids_are_valid(
1307
 
            inventory._check_delta_new_path_entry_both_or_None(delta))))),
1308
 
            reverse=True):
 
1286
        for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
1309
1287
            if (file_id in insertions) or (file_id in removals):
1310
 
                raise errors.InconsistentDelta(old_path or new_path, file_id,
1311
 
                    "repeated file_id")
 
1288
                raise AssertionError("repeated file id in delta %r" % (file_id,))
1312
1289
            if old_path is not None:
1313
1290
                old_path = old_path.encode('utf-8')
1314
1291
                removals[file_id] = old_path
1315
 
            else:
1316
 
                new_ids.add(file_id)
1317
1292
            if new_path is not None:
1318
 
                if inv_entry is None:
1319
 
                    raise errors.InconsistentDelta(new_path, file_id,
1320
 
                        "new_path with no entry")
1321
1293
                new_path = new_path.encode('utf-8')
1322
 
                dirname_utf8, basename = osutils.split(new_path)
1323
 
                if basename:
1324
 
                    parents.add((dirname_utf8, inv_entry.parent_id))
1325
 
                key = (dirname_utf8, basename, file_id)
 
1294
                dirname, basename = osutils.split(new_path)
 
1295
                key = (dirname, basename, file_id)
1326
1296
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1327
1297
                if minikind == 't':
1328
 
                    fingerprint = inv_entry.reference_revision or ''
 
1298
                    fingerprint = inv_entry.reference_revision
1329
1299
                else:
1330
1300
                    fingerprint = ''
1331
1301
                insertions[file_id] = (key, minikind, inv_entry.executable,
1340
1310
                    minikind = child[1][0][0]
1341
1311
                    fingerprint = child[1][0][4]
1342
1312
                    executable = child[1][0][3]
1343
 
                    old_child_path = osutils.pathjoin(child_dirname,
1344
 
                                                      child_basename)
 
1313
                    old_child_path = osutils.pathjoin(child[0][0],
 
1314
                                                      child[0][1])
1345
1315
                    removals[child[0][2]] = old_child_path
1346
1316
                    child_suffix = child_dirname[len(old_path):]
1347
1317
                    new_child_dirname = (new_path + child_suffix)
1348
1318
                    key = (new_child_dirname, child_basename, child[0][2])
1349
 
                    new_child_path = osutils.pathjoin(new_child_dirname,
1350
 
                                                      child_basename)
 
1319
                    new_child_path = os.path.join(new_child_dirname,
 
1320
                                                  child_basename)
1351
1321
                    insertions[child[0][2]] = (key, minikind, executable,
1352
1322
                                               fingerprint, new_child_path)
1353
 
        self._check_delta_ids_absent(new_ids, delta, 0)
1354
 
        try:
1355
 
            self._apply_removals(removals.iteritems())
1356
 
            self._apply_insertions(insertions.values())
1357
 
            # Validate parents
1358
 
            self._after_delta_check_parents(parents, 0)
1359
 
        except errors.BzrError, e:
1360
 
            self._changes_aborted = True
1361
 
            if 'integrity error' not in str(e):
1362
 
                raise
1363
 
            # _get_entry raises BzrError when a request is inconsistent; we
1364
 
            # want such errors to be shown as InconsistentDelta - and that 
1365
 
            # fits the behaviour we trigger.
1366
 
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
 
1323
        self._apply_removals(removals.values())
 
1324
        self._apply_insertions(insertions.values())
1367
1325
 
1368
1326
    def _apply_removals(self, removals):
1369
 
        for file_id, path in sorted(removals, reverse=True,
1370
 
            key=operator.itemgetter(1)):
 
1327
        for path in sorted(removals, reverse=True):
1371
1328
            dirname, basename = osutils.split(path)
1372
1329
            block_i, entry_i, d_present, f_present = \
1373
1330
                self._get_block_entry_index(dirname, basename, 0)
1374
 
            try:
1375
 
                entry = self._dirblocks[block_i][1][entry_i]
1376
 
            except IndexError:
1377
 
                self._changes_aborted = True
1378
 
                raise errors.InconsistentDelta(path, file_id,
1379
 
                    "Wrong path for old path.")
1380
 
            if not f_present or entry[1][0][0] in 'ar':
1381
 
                self._changes_aborted = True
1382
 
                raise errors.InconsistentDelta(path, file_id,
1383
 
                    "Wrong path for old path.")
1384
 
            if file_id != entry[0][2]:
1385
 
                self._changes_aborted = True
1386
 
                raise errors.InconsistentDelta(path, file_id,
1387
 
                    "Attempt to remove path has wrong id - found %r."
1388
 
                    % entry[0][2])
 
1331
            entry = self._dirblocks[block_i][1][entry_i]
1389
1332
            self._make_absent(entry)
1390
1333
            # See if we have a malformed delta: deleting a directory must not
1391
1334
            # leave crud behind. This increases the number of bisects needed
1399
1342
                # be due to it being in a parent tree, or a corrupt delta.
1400
1343
                for child_entry in self._dirblocks[block_i][1]:
1401
1344
                    if child_entry[1][0][0] not in ('r', 'a'):
1402
 
                        self._changes_aborted = True
1403
1345
                        raise errors.InconsistentDelta(path, entry[0][2],
1404
1346
                            "The file id was deleted but its children were "
1405
1347
                            "not deleted.")
1406
1348
 
1407
1349
    def _apply_insertions(self, adds):
1408
 
        try:
1409
 
            for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1410
 
                self.update_minimal(key, minikind, executable, fingerprint,
1411
 
                                    path_utf8=path_utf8)
1412
 
        except errors.NotVersionedError:
1413
 
            self._changes_aborted = True
1414
 
            raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
1415
 
                "Missing parent")
 
1350
        for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
 
1351
            self.update_minimal(key, minikind, executable, fingerprint,
 
1352
                                path_utf8=path_utf8)
1416
1353
 
1417
1354
    def update_basis_by_delta(self, delta, new_revid):
1418
1355
        """Update the parents of this tree after a commit.
1462
1399
        # At the same time, to reduce interface friction we convert the input
1463
1400
        # inventory entries to dirstate.
1464
1401
        root_only = ('', '')
1465
 
        # Accumulate parent references (path_utf8, id), to check for parentless
1466
 
        # items or items placed under files/links/tree-references. We get
1467
 
        # references from every item in the delta that is not a deletion and
1468
 
        # is not itself the root.
1469
 
        parents = set()
1470
 
        # Added ids must not be in the dirstate already. This set holds those
1471
 
        # ids.
1472
 
        new_ids = set()
1473
1402
        for old_path, new_path, file_id, inv_entry in delta:
1474
 
            if inv_entry is not None and file_id != inv_entry.file_id:
1475
 
                raise errors.InconsistentDelta(new_path, file_id,
1476
 
                    "mismatched entry file_id %r" % inv_entry)
1477
 
            if new_path is not None:
1478
 
                if inv_entry is None:
1479
 
                    raise errors.InconsistentDelta(new_path, file_id,
1480
 
                        "new_path with no entry")
1481
 
                new_path_utf8 = encode(new_path)
1482
 
                # note the parent for validation
1483
 
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1484
 
                if basename_utf8:
1485
 
                    parents.add((dirname_utf8, inv_entry.parent_id))
1486
1403
            if old_path is None:
1487
1404
                adds.append((None, encode(new_path), file_id,
1488
1405
                    inv_to_entry(inv_entry), True))
1489
 
                new_ids.add(file_id)
1490
1406
            elif new_path is None:
1491
1407
                deletes.append((encode(old_path), None, file_id, None, True))
1492
1408
            elif (old_path, new_path) != root_only:
1504
1420
                # for 'r' items on every pass.
1505
1421
                self._update_basis_apply_deletes(deletes)
1506
1422
                deletes = []
 
1423
                new_path_utf8 = encode(new_path)
1507
1424
                # Split into an add/delete pair recursively.
1508
1425
                adds.append((None, new_path_utf8, file_id,
1509
1426
                    inv_to_entry(inv_entry), False))
1535
1452
                # of everything.
1536
1453
                changes.append((encode(old_path), encode(new_path), file_id,
1537
1454
                    inv_to_entry(inv_entry)))
1538
 
        self._check_delta_ids_absent(new_ids, delta, 1)
1539
 
        try:
1540
 
            # Finish expunging deletes/first half of renames.
1541
 
            self._update_basis_apply_deletes(deletes)
1542
 
            # Reinstate second half of renames and new paths.
1543
 
            self._update_basis_apply_adds(adds)
1544
 
            # Apply in-situ changes.
1545
 
            self._update_basis_apply_changes(changes)
1546
 
            # Validate parents
1547
 
            self._after_delta_check_parents(parents, 1)
1548
 
        except errors.BzrError, e:
1549
 
            self._changes_aborted = True
1550
 
            if 'integrity error' not in str(e):
1551
 
                raise
1552
 
            # _get_entry raises BzrError when a request is inconsistent; we
1553
 
            # want such errors to be shown as InconsistentDelta - and that 
1554
 
            # fits the behaviour we trigger. Partof this is driven by dirstate
1555
 
            # only supporting deltas that turn the basis into a closer fit to
1556
 
            # the active tree.
1557
 
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
 
1455
 
 
1456
        # Finish expunging deletes/first half of renames.
 
1457
        self._update_basis_apply_deletes(deletes)
 
1458
        # Reinstate second half of renames and new paths.
 
1459
        self._update_basis_apply_adds(adds)
 
1460
        # Apply in-situ changes.
 
1461
        self._update_basis_apply_changes(changes)
1558
1462
 
1559
1463
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1560
1464
        self._header_state = DirState.IN_MEMORY_MODIFIED
1561
1465
        self._id_index = None
1562
1466
        return
1563
1467
 
1564
 
    def _check_delta_ids_absent(self, new_ids, delta, tree_index):
1565
 
        """Check that none of the file_ids in new_ids are present in a tree."""
1566
 
        if not new_ids:
1567
 
            return
1568
 
        id_index = self._get_id_index()
1569
 
        for file_id in new_ids:
1570
 
            for key in id_index.get(file_id, ()):
1571
 
                block_i, entry_i, d_present, f_present = \
1572
 
                    self._get_block_entry_index(key[0], key[1], tree_index)
1573
 
                if not f_present:
1574
 
                    # In a different tree
1575
 
                    continue
1576
 
                entry = self._dirblocks[block_i][1][entry_i]
1577
 
                if entry[0][2] != file_id:
1578
 
                    # Different file_id, so not what we want.
1579
 
                    continue
1580
 
                # NB: No changes made before this helper is called, so no need
1581
 
                # to set the _changes_aborted flag.
1582
 
                raise errors.InconsistentDelta(
1583
 
                    ("%s/%s" % key[0:2]).decode('utf8'), file_id,
1584
 
                    "This file_id is new in the delta but already present in "
1585
 
                    "the target")
1586
 
 
1587
1468
    def _update_basis_apply_adds(self, adds):
1588
1469
        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
1589
1470
 
1654
1535
        null = DirState.NULL_PARENT_DETAILS
1655
1536
        for old_path, new_path, file_id, _, real_delete in deletes:
1656
1537
            if real_delete != (new_path is None):
1657
 
                self._changes_aborted = True
1658
1538
                raise AssertionError("bad delete delta")
1659
1539
            # the entry for this file_id must be in tree 1.
1660
1540
            dirname, basename = osutils.split(old_path)
1693
1573
                    # it is being resurrected here, so blank it out temporarily.
1694
1574
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1695
1575
 
1696
 
    def _after_delta_check_parents(self, parents, index):
1697
 
        """Check that parents required by the delta are all intact.
1698
 
        
1699
 
        :param parents: An iterable of (path_utf8, file_id) tuples which are
1700
 
            required to be present in tree 'index' at path_utf8 with id file_id
1701
 
            and be a directory.
1702
 
        :param index: The column in the dirstate to check for parents in.
1703
 
        """
1704
 
        for dirname_utf8, file_id in parents:
1705
 
            # Get the entry - the ensures that file_id, dirname_utf8 exists and
1706
 
            # has the right file id.
1707
 
            entry = self._get_entry(index, file_id, dirname_utf8)
1708
 
            if entry[1] is None:
1709
 
                self._changes_aborted = True
1710
 
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
1711
 
                    file_id, "This parent is not present.")
1712
 
            # Parents of things must be directories
1713
 
            if entry[1][index][0] != 'd':
1714
 
                self._changes_aborted = True
1715
 
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
1716
 
                    file_id, "This parent is not a directory.")
1717
 
 
1718
1576
    def _observed_sha1(self, entry, sha1, stat_value,
1719
1577
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1720
1578
        """Note the sha1 of a file.
1963
1821
        self._read_dirblocks_if_needed()
1964
1822
        if path_utf8 is not None:
1965
1823
            if type(path_utf8) is not str:
1966
 
                raise errors.BzrError('path_utf8 is not a str: %s %r'
 
1824
                raise AssertionError('path_utf8 is not a str: %s %s'
1967
1825
                    % (type(path_utf8), path_utf8))
1968
1826
            # path lookups are faster
1969
1827
            dirname, basename = osutils.split(path_utf8)
1981
1839
                                          ' tree_index, file_id and path')
1982
1840
            return entry
1983
1841
        else:
1984
 
            possible_keys = self._get_id_index().get(fileid_utf8, ())
 
1842
            possible_keys = self._get_id_index().get(fileid_utf8, None)
1985
1843
            if not possible_keys:
1986
1844
                return None, None
1987
1845
            for key in possible_keys:
1998
1856
                entry_index, present = self._find_entry_index(key, block)
1999
1857
                if present:
2000
1858
                    entry = self._dirblocks[block_index][1][entry_index]
2001
 
                    # TODO: We might want to assert that entry[0][2] ==
2002
 
                    #       fileid_utf8.
2003
1859
                    if entry[1][tree_index][0] in 'fdlt':
2004
1860
                        # this is the result we are looking for: the
2005
1861
                        # real home of this file_id in this tree.
2144
2000
                yield entry
2145
2001
 
2146
2002
    def _get_id_index(self):
2147
 
        """Get an id index of self._dirblocks.
2148
 
        
2149
 
        This maps from file_id => [(directory, name, file_id)] entries where
2150
 
        that file_id appears in one of the trees.
2151
 
        """
 
2003
        """Get an id index of self._dirblocks."""
2152
2004
        if self._id_index is None:
2153
2005
            id_index = {}
2154
2006
            for key, tree_details in self._iter_entries():
2155
 
                self._add_to_id_index(id_index, key)
 
2007
                id_index.setdefault(key[2], set()).add(key)
2156
2008
            self._id_index = id_index
2157
2009
        return self._id_index
2158
2010
 
2159
 
    def _add_to_id_index(self, id_index, entry_key):
2160
 
        """Add this entry to the _id_index mapping."""
2161
 
        # This code used to use a set for every entry in the id_index. However,
2162
 
        # it is *rare* to have more than one entry. So a set is a large
2163
 
        # overkill. And even when we do, we won't ever have more than the
2164
 
        # number of parent trees. Which is still a small number (rarely >2). As
2165
 
        # such, we use a simple tuple, and do our own uniqueness checks. While
2166
 
        # the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2167
 
        # cause quadratic failure.
2168
 
        # TODO: This should use StaticTuple
2169
 
        file_id = entry_key[2]
2170
 
        entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2171
 
        if file_id not in id_index:
2172
 
            id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2173
 
        else:
2174
 
            entry_keys = id_index[file_id]
2175
 
            if entry_key not in entry_keys:
2176
 
                id_index[file_id] = entry_keys + (entry_key,)
2177
 
 
2178
 
    def _remove_from_id_index(self, id_index, entry_key):
2179
 
        """Remove this entry from the _id_index mapping.
2180
 
 
2181
 
        It is an programming error to call this when the entry_key is not
2182
 
        already present.
2183
 
        """
2184
 
        file_id = entry_key[2]
2185
 
        entry_keys = list(id_index[file_id])
2186
 
        entry_keys.remove(entry_key)
2187
 
        id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2188
 
 
2189
2011
    def _get_output_lines(self, lines):
2190
2012
        """Format lines for final output.
2191
2013
 
2391
2213
        self.update_minimal(('', '', new_id), 'd',
2392
2214
            path_utf8='', packed_stat=entry[1][0][4])
2393
2215
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2216
        if self._id_index is not None:
 
2217
            self._id_index.setdefault(new_id, set()).add(entry[0])
2394
2218
 
2395
2219
    def set_parent_trees(self, trees, ghosts):
2396
2220
        """Set the parent trees for the dirstate.
2449
2273
                continue
2450
2274
            by_path[entry[0]] = [entry[1][0]] + \
2451
2275
                [DirState.NULL_PARENT_DETAILS] * parent_count
2452
 
            # TODO: Possibly inline this, since we know it isn't present yet
2453
 
            #       id_index[entry[0][2]] = (entry[0],)
2454
 
            self._add_to_id_index(id_index, entry[0])
 
2276
            id_index[entry[0][2]] = set([entry[0]])
2455
2277
 
2456
2278
        # now the parent trees:
2457
2279
        for tree_index, tree in enumerate(parent_trees):
2479
2301
                new_entry_key = (dirname, basename, file_id)
2480
2302
                # tree index consistency: All other paths for this id in this tree
2481
2303
                # index must point to the correct path.
2482
 
                for entry_key in id_index.get(file_id, ()):
 
2304
                for entry_key in id_index.setdefault(file_id, set()):
2483
2305
                    # TODO:PROFILING: It might be faster to just update
2484
2306
                    # rather than checking if we need to, and then overwrite
2485
2307
                    # the one we are located at.
2491
2313
                        by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2492
2314
                # by path consistency: Insert into an existing path record (trivial), or
2493
2315
                # add a new one with relocation pointers for the other tree indexes.
2494
 
                entry_keys = id_index.get(file_id, ())
2495
 
                if new_entry_key in entry_keys:
 
2316
                if new_entry_key in id_index[file_id]:
2496
2317
                    # there is already an entry where this data belongs, just insert it.
2497
2318
                    by_path[new_entry_key][tree_index] = \
2498
2319
                        self._inv_entry_to_details(entry)
2503
2324
                    new_details = []
2504
2325
                    for lookup_index in xrange(tree_index):
2505
2326
                        # boundary case: this is the first occurence of file_id
2506
 
                        # so there are no id_indexes, possibly take this out of
 
2327
                        # so there are no id_indexs, possibly take this out of
2507
2328
                        # the loop?
2508
 
                        if not len(entry_keys):
 
2329
                        if not len(id_index[file_id]):
2509
2330
                            new_details.append(DirState.NULL_PARENT_DETAILS)
2510
2331
                        else:
2511
2332
                            # grab any one entry, use it to find the right path.
2512
2333
                            # TODO: optimise this to reduce memory use in highly
2513
2334
                            # fragmented situations by reusing the relocation
2514
2335
                            # records.
2515
 
                            a_key = iter(entry_keys).next()
 
2336
                            a_key = iter(id_index[file_id]).next()
2516
2337
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
2517
2338
                                # its a pointer or missing statement, use it as is.
2518
2339
                                new_details.append(by_path[a_key][lookup_index])
2523
2344
                    new_details.append(self._inv_entry_to_details(entry))
2524
2345
                    new_details.extend(new_location_suffix)
2525
2346
                    by_path[new_entry_key] = new_details
2526
 
                    self._add_to_id_index(id_index, new_entry_key)
 
2347
                    id_index[file_id].add(new_entry_key)
2527
2348
        # --- end generation of full tree mappings
2528
2349
 
2529
2350
        # sort and output all the entries
2558
2379
        if 'evil' in debug.debug_flags:
2559
2380
            trace.mutter_callsite(1,
2560
2381
                "set_state_from_inventory called; please mutate the tree instead")
2561
 
        tracing = 'dirstate' in debug.debug_flags
2562
 
        if tracing:
2563
 
            trace.mutter("set_state_from_inventory trace:")
2564
2382
        self._read_dirblocks_if_needed()
2565
2383
        # sketch:
2566
2384
        # Two iterators: current data and new data, both in dirblock order.
2575
2393
        new_iterator = new_inv.iter_entries_by_dir()
2576
2394
        # we will be modifying the dirstate, so we need a stable iterator. In
2577
2395
        # future we might write one, for now we just clone the state into a
2578
 
        # list using a copy so that we see every original item and don't have
2579
 
        # to adjust the position when items are inserted or deleted in the
2580
 
        # underlying dirstate.
 
2396
        # list - which is a shallow copy.
2581
2397
        old_iterator = iter(list(self._iter_entries()))
2582
2398
        # both must have roots so this is safe:
2583
2399
        current_new = new_iterator.next()
2617
2433
            # we make both end conditions explicit
2618
2434
            if not current_old:
2619
2435
                # old is finished: insert current_new into the state.
2620
 
                if tracing:
2621
 
                    trace.mutter("Appending from new '%s'.",
2622
 
                        new_path_utf8.decode('utf8'))
2623
2436
                self.update_minimal(new_entry_key, current_new_minikind,
2624
2437
                    executable=current_new[1].executable,
2625
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
2626
 
                    fullscan=True)
 
2438
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
2627
2439
                current_new = advance(new_iterator)
2628
2440
            elif not current_new:
2629
2441
                # new is finished
2630
 
                if tracing:
2631
 
                    trace.mutter("Truncating from old '%s/%s'.",
2632
 
                        current_old[0][0].decode('utf8'),
2633
 
                        current_old[0][1].decode('utf8'))
2634
2442
                self._make_absent(current_old)
2635
2443
                current_old = advance(old_iterator)
2636
2444
            elif new_entry_key == current_old[0]:
2643
2451
                # kind has changed.
2644
2452
                if (current_old[1][0][3] != current_new[1].executable or
2645
2453
                    current_old[1][0][0] != current_new_minikind):
2646
 
                    if tracing:
2647
 
                        trace.mutter("Updating in-place change '%s'.",
2648
 
                            new_path_utf8.decode('utf8'))
2649
2454
                    self.update_minimal(current_old[0], current_new_minikind,
2650
2455
                        executable=current_new[1].executable,
2651
 
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
2652
 
                        fullscan=True)
 
2456
                        path_utf8=new_path_utf8, fingerprint=fingerprint)
2653
2457
                # both sides are dealt with, move on
2654
2458
                current_old = advance(old_iterator)
2655
2459
                current_new = advance(new_iterator)
2658
2462
                      and new_entry_key[1:] < current_old[0][1:])):
2659
2463
                # new comes before:
2660
2464
                # add a entry for this and advance new
2661
 
                if tracing:
2662
 
                    trace.mutter("Inserting from new '%s'.",
2663
 
                        new_path_utf8.decode('utf8'))
2664
2465
                self.update_minimal(new_entry_key, current_new_minikind,
2665
2466
                    executable=current_new[1].executable,
2666
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
2667
 
                    fullscan=True)
 
2467
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
2668
2468
                current_new = advance(new_iterator)
2669
2469
            else:
2670
2470
                # we've advanced past the place where the old key would be,
2671
2471
                # without seeing it in the new list.  so it must be gone.
2672
 
                if tracing:
2673
 
                    trace.mutter("Deleting from old '%s/%s'.",
2674
 
                        current_old[0][0].decode('utf8'),
2675
 
                        current_old[0][1].decode('utf8'))
2676
2472
                self._make_absent(current_old)
2677
2473
                current_old = advance(old_iterator)
2678
2474
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2679
2475
        self._id_index = None
2680
2476
        self._packed_stat_index = None
2681
 
        if tracing:
2682
 
            trace.mutter("set_state_from_inventory complete.")
2683
2477
 
2684
2478
    def _make_absent(self, current_old):
2685
2479
        """Mark current_old - an entry - as absent for tree 0.
2711
2505
            block[1].pop(entry_index)
2712
2506
            # if we have an id_index in use, remove this key from it for this id.
2713
2507
            if self._id_index is not None:
2714
 
                self._remove_from_id_index(self._id_index, current_old[0])
 
2508
                self._id_index[current_old[0][2]].remove(current_old[0])
2715
2509
        # update all remaining keys for this id to record it as absent. The
2716
2510
        # existing details may either be the record we are marking as deleted
2717
2511
        # (if there were other trees with the id present at this path), or may
2734
2528
        return last_reference
2735
2529
 
2736
2530
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2737
 
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
 
2531
                       packed_stat=None, size=0, path_utf8=None):
2738
2532
        """Update an entry to the state in tree 0.
2739
2533
 
2740
2534
        This will either create a new entry at 'key' or update an existing one.
2751
2545
        :param size: Size information for new entry
2752
2546
        :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2753
2547
                extra computation.
2754
 
        :param fullscan: If True then a complete scan of the dirstate is being
2755
 
            done and checking for duplicate rows should not be done. This
2756
 
            should only be set by set_state_from_inventory and similar methods.
2757
2548
 
2758
2549
        If packed_stat and fingerprint are not given, they're invalidated in
2759
2550
        the entry.
2768
2559
        new_details = (minikind, fingerprint, size, executable, packed_stat)
2769
2560
        id_index = self._get_id_index()
2770
2561
        if not present:
2771
 
            # New record. Check there isn't a entry at this path already.
2772
 
            if not fullscan:
2773
 
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
2774
 
                while low_index < len(block):
2775
 
                    entry = block[low_index]
2776
 
                    if entry[0][0:2] == key[0:2]:
2777
 
                        if entry[1][0][0] not in 'ar':
2778
 
                            # This entry has the same path (but a different id) as
2779
 
                            # the new entry we're adding, and is present in ths
2780
 
                            # tree.
2781
 
                            raise errors.InconsistentDelta(
2782
 
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
2783
 
                                "Attempt to add item at path already occupied by "
2784
 
                                "id %r" % entry[0][2])
2785
 
                        low_index += 1
2786
 
                    else:
2787
 
                        break
2788
2562
            # new entry, synthesis cross reference here,
2789
 
            existing_keys = id_index.get(key[2], ())
 
2563
            existing_keys = id_index.setdefault(key[2], set())
2790
2564
            if not existing_keys:
2791
2565
                # not currently in the state, simplest case
2792
2566
                new_entry = key, [new_details] + self._empty_parent_info()
2795
2569
                # grab one of them and use it to generate parent
2796
2570
                # relocation/absent entries.
2797
2571
                new_entry = key, [new_details]
2798
 
                # existing_keys can be changed as we iterate.
2799
 
                for other_key in tuple(existing_keys):
 
2572
                for other_key in existing_keys:
2800
2573
                    # change the record at other to be a pointer to this new
2801
2574
                    # record. The loop looks similar to the change to
2802
2575
                    # relocations when updating an existing record but its not:
2803
2576
                    # the test for existing kinds is different: this can be
2804
2577
                    # factored out to a helper though.
2805
 
                    other_block_index, present = self._find_block_index_from_key(
2806
 
                        other_key)
2807
 
                    if not present:
2808
 
                        raise AssertionError('could not find block for %s' % (
2809
 
                            other_key,))
2810
 
                    other_block = self._dirblocks[other_block_index][1]
2811
 
                    other_entry_index, present = self._find_entry_index(
2812
 
                        other_key, other_block)
2813
 
                    if not present:
2814
 
                        raise AssertionError(
2815
 
                            'update_minimal: could not find other entry for %s'
2816
 
                            % (other_key,))
 
2578
                    other_block_index, present = self._find_block_index_from_key(other_key)
 
2579
                    if not present:
 
2580
                        raise AssertionError('could not find block for %s' % (other_key,))
 
2581
                    other_entry_index, present = self._find_entry_index(other_key,
 
2582
                                            self._dirblocks[other_block_index][1])
 
2583
                    if not present:
 
2584
                        raise AssertionError('could not find entry for %s' % (other_key,))
2817
2585
                    if path_utf8 is None:
2818
2586
                        raise AssertionError('no path')
2819
 
                    # Turn this other location into a reference to the new
2820
 
                    # location. This also updates the aliased iterator
2821
 
                    # (current_old in set_state_from_inventory) so that the old
2822
 
                    # entry, if not already examined, is skipped over by that
2823
 
                    # loop.
2824
 
                    other_entry = other_block[other_entry_index]
2825
 
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
2826
 
                    if self._maybe_remove_row(other_block, other_entry_index,
2827
 
                                              id_index):
2828
 
                        # If the row holding this was removed, we need to
2829
 
                        # recompute where this entry goes
2830
 
                        entry_index, _ = self._find_entry_index(key, block)
 
2587
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
 
2588
                        ('r', path_utf8, 0, False, '')
2831
2589
 
2832
 
                # This loop:
2833
 
                # adds a tuple to the new details for each column
2834
 
                #  - either by copying an existing relocation pointer inside that column
2835
 
                #  - or by creating a new pointer to the right row inside that column
2836
2590
                num_present_parents = self._num_present_parents()
2837
 
                if num_present_parents:
2838
 
                    # TODO: This re-evaluates the existing_keys set, do we need
2839
 
                    #       to do that ourselves?
2840
 
                    other_key = list(existing_keys)[0]
2841
2591
                for lookup_index in xrange(1, num_present_parents + 1):
2842
2592
                    # grab any one entry, use it to find the right path.
2843
2593
                    # TODO: optimise this to reduce memory use in highly
2850
2600
                    update_entry_index, present = \
2851
2601
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
2852
2602
                    if not present:
2853
 
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
 
2603
                        raise AssertionError('could not find entry for %s' % (other_key,))
2854
2604
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
2855
2605
                    if update_details[0] in 'ar': # relocated, absent
2856
2606
                        # its a pointer or absent in lookup_index's tree, use
2861
2611
                        pointer_path = osutils.pathjoin(*other_key[0:2])
2862
2612
                        new_entry[1].append(('r', pointer_path, 0, False, ''))
2863
2613
            block.insert(entry_index, new_entry)
2864
 
            self._add_to_id_index(id_index, key)
 
2614
            existing_keys.add(key)
2865
2615
        else:
2866
2616
            # Does the new state matter?
2867
2617
            block[entry_index][1][0] = new_details
2876
2626
            # converted to relocated.
2877
2627
            if path_utf8 is None:
2878
2628
                raise AssertionError('no path')
2879
 
            existing_keys = id_index.get(key[2], ())
2880
 
            if key not in existing_keys:
2881
 
                raise AssertionError('We found the entry in the blocks, but'
2882
 
                    ' the key is not in the id_index.'
2883
 
                    ' key: %s, existing_keys: %s' % (key, existing_keys))
2884
 
            for entry_key in existing_keys:
 
2629
            for entry_key in id_index.setdefault(key[2], set()):
2885
2630
                # TODO:PROFILING: It might be faster to just update
2886
2631
                # rather than checking if we need to, and then overwrite
2887
2632
                # the one we are located at.
2907
2652
 
2908
2653
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2909
2654
 
2910
 
    def _maybe_remove_row(self, block, index, id_index):
2911
 
        """Remove index if it is absent or relocated across the row.
2912
 
        
2913
 
        id_index is updated accordingly.
2914
 
        :return: True if we removed the row, False otherwise
2915
 
        """
2916
 
        present_in_row = False
2917
 
        entry = block[index]
2918
 
        for column in entry[1]:
2919
 
            if column[0] not in 'ar':
2920
 
                present_in_row = True
2921
 
                break
2922
 
        if not present_in_row:
2923
 
            block.pop(index)
2924
 
            self._remove_from_id_index(id_index, entry[0])
2925
 
            return True
2926
 
        return False
2927
 
 
2928
2655
    def _validate(self):
2929
2656
        """Check that invariants on the dirblock are correct.
2930
2657
 
3064
2791
            if absent_positions == tree_count:
3065
2792
                raise AssertionError(
3066
2793
                    "entry %r has no data for any tree." % (entry,))
3067
 
        if self._id_index is not None:
3068
 
            for file_id, entry_keys in self._id_index.iteritems():
3069
 
                for entry_key in entry_keys:
3070
 
                    if entry_key[2] != file_id:
3071
 
                        raise AssertionError(
3072
 
                            'file_id %r did not match entry key %s'
3073
 
                            % (file_id, entry_key))
3074
 
                if len(entry_keys) != len(set(entry_keys)):
3075
 
                    raise AssertionError(
3076
 
                        'id_index contained non-unique data for %s'
3077
 
                        % (entry_keys,))
3078
2794
 
3079
2795
    def _wipe_state(self):
3080
2796
        """Forget all state information about the dirstate."""
3222
2938
                           False, DirState.NULLSTAT)
3223
2939
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
3224
2940
    return link_or_sha1
 
2941
update_entry = py_update_entry
3225
2942
 
3226
2943
 
3227
2944
class ProcessEntryPython(object):
3228
2945
 
3229
 
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
 
2946
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
3230
2947
        "last_source_parent", "last_target_parent", "include_unchanged",
3231
 
        "partial", "use_filesystem_for_exec", "utf8_decode",
3232
 
        "searched_specific_files", "search_specific_files",
3233
 
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
3234
 
        "state", "source_index", "target_index", "want_unversioned", "tree"]
 
2948
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
 
2949
        "search_specific_files", "state", "source_index", "target_index",
 
2950
        "want_unversioned", "tree"]
3235
2951
 
3236
2952
    def __init__(self, include_unchanged, use_filesystem_for_exec,
3237
2953
        search_specific_files, state, source_index, target_index,
3238
2954
        want_unversioned, tree):
3239
2955
        self.old_dirname_to_file_id = {}
3240
2956
        self.new_dirname_to_file_id = {}
3241
 
        # Are we doing a partial iter_changes?
3242
 
        self.partial = search_specific_files != set([''])
 
2957
        # Just a sentry, so that _process_entry can say that this
 
2958
        # record is handled, but isn't interesting to process (unchanged)
 
2959
        self.uninteresting = object()
3243
2960
        # Using a list so that we can access the values and change them in
3244
2961
        # nested scope. Each one is [path, file_id, entry]
3245
2962
        self.last_source_parent = [None, None]
3248
2965
        self.use_filesystem_for_exec = use_filesystem_for_exec
3249
2966
        self.utf8_decode = cache_utf8._utf8_decode
3250
2967
        # for all search_indexs in each path at or under each element of
3251
 
        # search_specific_files, if the detail is relocated: add the id, and
3252
 
        # add the relocated path as one to search if its not searched already.
3253
 
        # If the detail is not relocated, add the id.
 
2968
        # search_specific_files, if the detail is relocated: add the id, and add the
 
2969
        # relocated path as one to search if its not searched already. If the
 
2970
        # detail is not relocated, add the id.
3254
2971
        self.searched_specific_files = set()
3255
 
        # When we search exact paths without expanding downwards, we record
3256
 
        # that here.
3257
 
        self.searched_exact_paths = set()
3258
2972
        self.search_specific_files = search_specific_files
3259
 
        # The parents up to the root of the paths we are searching.
3260
 
        # After all normal paths are returned, these specific items are returned.
3261
 
        self.search_specific_file_parents = set()
3262
 
        # The ids we've sent out in the delta.
3263
 
        self.seen_ids = set()
3264
2973
        self.state = state
3265
2974
        self.source_index = source_index
3266
2975
        self.target_index = target_index
3267
 
        if target_index != 0:
3268
 
            # A lot of code in here depends on target_index == 0
3269
 
            raise errors.BzrError('unsupported target index')
3270
2976
        self.want_unversioned = want_unversioned
3271
2977
        self.tree = tree
3272
2978
 
3274
2980
        """Compare an entry and real disk to generate delta information.
3275
2981
 
3276
2982
        :param path_info: top_relpath, basename, kind, lstat, abspath for
3277
 
            the path of entry. If None, then the path is considered absent in 
3278
 
            the target (Perhaps we should pass in a concrete entry for this ?)
 
2983
            the path of entry. If None, then the path is considered absent.
 
2984
            (Perhaps we should pass in a concrete entry for this ?)
3279
2985
            Basename is returned as a utf8 string because we expect this
3280
2986
            tuple will be ignored, and don't want to take the time to
3281
2987
            decode.
3282
 
        :return: (iter_changes_result, changed). If the entry has not been
3283
 
            handled then changed is None. Otherwise it is False if no content
3284
 
            or metadata changes have occurred, and True if any content or
3285
 
            metadata change has occurred. If self.include_unchanged is True then
3286
 
            if changed is not None, iter_changes_result will always be a result
3287
 
            tuple. Otherwise, iter_changes_result is None unless changed is
3288
 
            True.
 
2988
        :return: None if these don't match
 
2989
                 A tuple of information about the change, or
 
2990
                 the object 'uninteresting' if these match, but are
 
2991
                 basically identical.
3289
2992
        """
3290
2993
        if self.source_index is None:
3291
2994
            source_details = DirState.NULL_PARENT_DETAILS
3359
3062
                    if source_minikind != 'f':
3360
3063
                        content_change = True
3361
3064
                    else:
3362
 
                        # Check the sha. We can't just rely on the size as
3363
 
                        # content filtering may mean differ sizes actually
3364
 
                        # map to the same content
3365
 
                        if link_or_sha1 is None:
3366
 
                            # Stat cache miss:
3367
 
                            statvalue, link_or_sha1 = \
3368
 
                                self.state._sha1_provider.stat_and_sha1(
3369
 
                                path_info[4])
3370
 
                            self.state._observed_sha1(entry, link_or_sha1,
3371
 
                                statvalue)
3372
 
                        content_change = (link_or_sha1 != source_details[1])
 
3065
                        # If the size is the same, check the sha:
 
3066
                        if target_details[2] == source_details[2]:
 
3067
                            if link_or_sha1 is None:
 
3068
                                # Stat cache miss:
 
3069
                                statvalue, link_or_sha1 = \
 
3070
                                    self.state._sha1_provider.stat_and_sha1(
 
3071
                                    path_info[4])
 
3072
                                self.state._observed_sha1(entry, link_or_sha1,
 
3073
                                    statvalue)
 
3074
                            content_change = (link_or_sha1 != source_details[1])
 
3075
                        else:
 
3076
                            # Size changed, so must be different
 
3077
                            content_change = True
3373
3078
                    # Target details is updated at update_entry time
3374
3079
                    if self.use_filesystem_for_exec:
3375
3080
                        # We don't need S_ISREG here, because we are sure
3390
3095
                        content_change = False
3391
3096
                    target_exec = False
3392
3097
                else:
3393
 
                    if path is None:
3394
 
                        path = pathjoin(old_dirname, old_basename)
3395
 
                    raise errors.BadFileKindError(path, path_info[2])
 
3098
                    raise Exception, "unknown kind %s" % path_info[2]
3396
3099
            if source_minikind == 'd':
3397
3100
                if path is None:
3398
3101
                    old_path = path = pathjoin(old_dirname, old_basename)
3399
3102
                self.old_dirname_to_file_id[old_path] = file_id
3400
3103
            # parent id is the entry for the path in the target tree
3401
 
            if old_basename and old_dirname == self.last_source_parent[0]:
 
3104
            if old_dirname == self.last_source_parent[0]:
3402
3105
                source_parent_id = self.last_source_parent[1]
3403
3106
            else:
3404
3107
                try:
3414
3117
                    self.last_source_parent[0] = old_dirname
3415
3118
                    self.last_source_parent[1] = source_parent_id
3416
3119
            new_dirname = entry[0][0]
3417
 
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
 
3120
            if new_dirname == self.last_target_parent[0]:
3418
3121
                target_parent_id = self.last_target_parent[1]
3419
3122
            else:
3420
3123
                try:
3437
3140
                    self.last_target_parent[1] = target_parent_id
3438
3141
 
3439
3142
            source_exec = source_details[3]
3440
 
            changed = (content_change
 
3143
            if (self.include_unchanged
 
3144
                or content_change
3441
3145
                or source_parent_id != target_parent_id
3442
3146
                or old_basename != entry[0][1]
3443
3147
                or source_exec != target_exec
3444
 
                )
3445
 
            if not changed and not self.include_unchanged:
3446
 
                return None, False
3447
 
            else:
 
3148
                ):
3448
3149
                if old_path is None:
3449
3150
                    old_path = path = pathjoin(old_dirname, old_basename)
3450
3151
                    old_path_u = self.utf8_decode(old_path)[0]
3463
3164
                       (source_parent_id, target_parent_id),
3464
3165
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3465
3166
                       (source_kind, target_kind),
3466
 
                       (source_exec, target_exec)), changed
 
3167
                       (source_exec, target_exec))
 
3168
            else:
 
3169
                return self.uninteresting
3467
3170
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3468
3171
            # looks like a new file
3469
3172
            path = pathjoin(entry[0][0], entry[0][1])
3490
3193
                       (None, parent_id),
3491
3194
                       (None, self.utf8_decode(entry[0][1])[0]),
3492
3195
                       (None, path_info[2]),
3493
 
                       (None, target_exec)), True
 
3196
                       (None, target_exec))
3494
3197
            else:
3495
3198
                # Its a missing file, report it as such.
3496
3199
                return (entry[0][2],
3500
3203
                       (None, parent_id),
3501
3204
                       (None, self.utf8_decode(entry[0][1])[0]),
3502
3205
                       (None, None),
3503
 
                       (None, False)), True
 
3206
                       (None, False))
3504
3207
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3505
3208
            # unversioned, possibly, or possibly not deleted: we dont care.
3506
3209
            # if its still on disk, *and* theres no other entry at this
3518
3221
                   (parent_id, None),
3519
3222
                   (self.utf8_decode(entry[0][1])[0], None),
3520
3223
                   (DirState._minikind_to_kind[source_minikind], None),
3521
 
                   (source_details[3], None)), True
 
3224
                   (source_details[3], None))
3522
3225
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3523
3226
            # a rename; could be a true rename, or a rename inherited from
3524
3227
            # a renamed parent. TODO: handle this efficiently. Its not
3536
3239
                "source_minikind=%r, target_minikind=%r"
3537
3240
                % (source_minikind, target_minikind))
3538
3241
            ## import pdb;pdb.set_trace()
3539
 
        return None, None
 
3242
        return None
3540
3243
 
3541
3244
    def __iter__(self):
3542
3245
        return self
3543
3246
 
3544
 
    def _gather_result_for_consistency(self, result):
3545
 
        """Check a result we will yield to make sure we are consistent later.
3546
 
        
3547
 
        This gathers result's parents into a set to output later.
3548
 
 
3549
 
        :param result: A result tuple.
3550
 
        """
3551
 
        if not self.partial or not result[0]:
3552
 
            return
3553
 
        self.seen_ids.add(result[0])
3554
 
        new_path = result[1][1]
3555
 
        if new_path:
3556
 
            # Not the root and not a delete: queue up the parents of the path.
3557
 
            self.search_specific_file_parents.update(
3558
 
                osutils.parent_directories(new_path.encode('utf8')))
3559
 
            # Add the root directory which parent_directories does not
3560
 
            # provide.
3561
 
            self.search_specific_file_parents.add('')
3562
 
 
3563
3247
    def iter_changes(self):
3564
3248
        """Iterate over the changes."""
3565
3249
        utf8_decode = cache_utf8._utf8_decode
3566
3250
        _cmp_by_dirs = cmp_by_dirs
3567
3251
        _process_entry = self._process_entry
 
3252
        uninteresting = self.uninteresting
3568
3253
        search_specific_files = self.search_specific_files
3569
3254
        searched_specific_files = self.searched_specific_files
3570
3255
        splitpath = osutils.splitpath
3640
3325
                continue
3641
3326
            path_handled = False
3642
3327
            for entry in root_entries:
3643
 
                result, changed = _process_entry(entry, root_dir_info)
3644
 
                if changed is not None:
 
3328
                result = _process_entry(entry, root_dir_info)
 
3329
                if result is not None:
3645
3330
                    path_handled = True
3646
 
                    if changed:
3647
 
                        self._gather_result_for_consistency(result)
3648
 
                    if changed or self.include_unchanged:
 
3331
                    if result is not uninteresting:
3649
3332
                        yield result
3650
3333
            if self.want_unversioned and not path_handled and root_dir_info:
3651
3334
                new_executable = bool(
3761
3444
                        for current_entry in current_block[1]:
3762
3445
                            # entry referring to file not present on disk.
3763
3446
                            # advance the entry only, after processing.
3764
 
                            result, changed = _process_entry(current_entry, None)
3765
 
                            if changed is not None:
3766
 
                                if changed:
3767
 
                                    self._gather_result_for_consistency(result)
3768
 
                                if changed or self.include_unchanged:
 
3447
                            result = _process_entry(current_entry, None)
 
3448
                            if result is not None:
 
3449
                                if result is not uninteresting:
3769
3450
                                    yield result
3770
3451
                        block_index +=1
3771
3452
                        if (block_index < len(self.state._dirblocks) and
3801
3482
                        pass
3802
3483
                    elif current_path_info is None:
3803
3484
                        # no path is fine: the per entry code will handle it.
3804
 
                        result, changed = _process_entry(current_entry, current_path_info)
3805
 
                        if changed is not None:
3806
 
                            if changed:
3807
 
                                self._gather_result_for_consistency(result)
3808
 
                            if changed or self.include_unchanged:
 
3485
                        result = _process_entry(current_entry, current_path_info)
 
3486
                        if result is not None:
 
3487
                            if result is not uninteresting:
3809
3488
                                yield result
3810
3489
                    elif (current_entry[0][1] != current_path_info[1]
3811
3490
                          or current_entry[1][self.target_index][0] in 'ar'):
3824
3503
                        else:
3825
3504
                            # entry referring to file not present on disk.
3826
3505
                            # advance the entry only, after processing.
3827
 
                            result, changed = _process_entry(current_entry, None)
3828
 
                            if changed is not None:
3829
 
                                if changed:
3830
 
                                    self._gather_result_for_consistency(result)
3831
 
                                if changed or self.include_unchanged:
 
3506
                            result = _process_entry(current_entry, None)
 
3507
                            if result is not None:
 
3508
                                if result is not uninteresting:
3832
3509
                                    yield result
3833
3510
                            advance_path = False
3834
3511
                    else:
3835
 
                        result, changed = _process_entry(current_entry, current_path_info)
3836
 
                        if changed is not None:
 
3512
                        result = _process_entry(current_entry, current_path_info)
 
3513
                        if result is not None:
3837
3514
                            path_handled = True
3838
 
                            if changed:
3839
 
                                self._gather_result_for_consistency(result)
3840
 
                            if changed or self.include_unchanged:
 
3515
                            if result is not uninteresting:
3841
3516
                                yield result
3842
3517
                    if advance_entry and current_entry is not None:
3843
3518
                        entry_index += 1
3902
3577
                        current_dir_info = dir_iterator.next()
3903
3578
                    except StopIteration:
3904
3579
                        current_dir_info = None
3905
 
        for result in self._iter_specific_file_parents():
3906
 
            yield result
3907
 
 
3908
 
    def _iter_specific_file_parents(self):
3909
 
        """Iter over the specific file parents."""
3910
 
        while self.search_specific_file_parents:
3911
 
            # Process the parent directories for the paths we were iterating.
3912
 
            # Even in extremely large trees this should be modest, so currently
3913
 
            # no attempt is made to optimise.
3914
 
            path_utf8 = self.search_specific_file_parents.pop()
3915
 
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
3916
 
                # We've examined this path.
3917
 
                continue
3918
 
            if path_utf8 in self.searched_exact_paths:
3919
 
                # We've examined this path.
3920
 
                continue
3921
 
            path_entries = self.state._entries_for_path(path_utf8)
3922
 
            # We need either one or two entries. If the path in
3923
 
            # self.target_index has moved (so the entry in source_index is in
3924
 
            # 'ar') then we need to also look for the entry for this path in
3925
 
            # self.source_index, to output the appropriate delete-or-rename.
3926
 
            selected_entries = []
3927
 
            found_item = False
3928
 
            for candidate_entry in path_entries:
3929
 
                # Find entries present in target at this path:
3930
 
                if candidate_entry[1][self.target_index][0] not in 'ar':
3931
 
                    found_item = True
3932
 
                    selected_entries.append(candidate_entry)
3933
 
                # Find entries present in source at this path:
3934
 
                elif (self.source_index is not None and
3935
 
                    candidate_entry[1][self.source_index][0] not in 'ar'):
3936
 
                    found_item = True
3937
 
                    if candidate_entry[1][self.target_index][0] == 'a':
3938
 
                        # Deleted, emit it here.
3939
 
                        selected_entries.append(candidate_entry)
3940
 
                    else:
3941
 
                        # renamed, emit it when we process the directory it
3942
 
                        # ended up at.
3943
 
                        self.search_specific_file_parents.add(
3944
 
                            candidate_entry[1][self.target_index][1])
3945
 
            if not found_item:
3946
 
                raise AssertionError(
3947
 
                    "Missing entry for specific path parent %r, %r" % (
3948
 
                    path_utf8, path_entries))
3949
 
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
3950
 
            for entry in selected_entries:
3951
 
                if entry[0][2] in self.seen_ids:
3952
 
                    continue
3953
 
                result, changed = self._process_entry(entry, path_info)
3954
 
                if changed is None:
3955
 
                    raise AssertionError(
3956
 
                        "Got entry<->path mismatch for specific path "
3957
 
                        "%r entry %r path_info %r " % (
3958
 
                        path_utf8, entry, path_info))
3959
 
                # Only include changes - we're outside the users requested
3960
 
                # expansion.
3961
 
                if changed:
3962
 
                    self._gather_result_for_consistency(result)
3963
 
                    if (result[6][0] == 'directory' and
3964
 
                        result[6][1] != 'directory'):
3965
 
                        # This stopped being a directory, the old children have
3966
 
                        # to be included.
3967
 
                        if entry[1][self.source_index][0] == 'r':
3968
 
                            # renamed, take the source path
3969
 
                            entry_path_utf8 = entry[1][self.source_index][1]
3970
 
                        else:
3971
 
                            entry_path_utf8 = path_utf8
3972
 
                        initial_key = (entry_path_utf8, '', '')
3973
 
                        block_index, _ = self.state._find_block_index_from_key(
3974
 
                            initial_key)
3975
 
                        if block_index == 0:
3976
 
                            # The children of the root are in block index 1.
3977
 
                            block_index +=1
3978
 
                        current_block = None
3979
 
                        if block_index < len(self.state._dirblocks):
3980
 
                            current_block = self.state._dirblocks[block_index]
3981
 
                            if not osutils.is_inside(
3982
 
                                entry_path_utf8, current_block[0]):
3983
 
                                # No entries for this directory at all.
3984
 
                                current_block = None
3985
 
                        if current_block is not None:
3986
 
                            for entry in current_block[1]:
3987
 
                                if entry[1][self.source_index][0] in 'ar':
3988
 
                                    # Not in the source tree, so doesn't have to be
3989
 
                                    # included.
3990
 
                                    continue
3991
 
                                # Path of the entry itself.
3992
 
 
3993
 
                                self.search_specific_file_parents.add(
3994
 
                                    osutils.pathjoin(*entry[0][:2]))
3995
 
                if changed or self.include_unchanged:
3996
 
                    yield result
3997
 
            self.searched_exact_paths.add(path_utf8)
3998
 
 
3999
 
    def _path_info(self, utf8_path, unicode_path):
4000
 
        """Generate path_info for unicode_path.
4001
 
 
4002
 
        :return: None if unicode_path does not exist, or a path_info tuple.
4003
 
        """
4004
 
        abspath = self.tree.abspath(unicode_path)
4005
 
        try:
4006
 
            stat = os.lstat(abspath)
4007
 
        except OSError, e:
4008
 
            if e.errno == errno.ENOENT:
4009
 
                # the path does not exist.
4010
 
                return None
4011
 
            else:
4012
 
                raise
4013
 
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
4014
 
        dir_info = (utf8_path, utf8_basename,
4015
 
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
4016
 
            abspath)
4017
 
        if dir_info[2] == 'directory':
4018
 
            if self.tree._directory_is_tree_reference(
4019
 
                unicode_path):
4020
 
                self.root_dir_info = self.root_dir_info[:2] + \
4021
 
                    ('tree-reference',) + self.root_dir_info[3:]
4022
 
        return dir_info
 
3580
_process_entry = ProcessEntryPython
4023
3581
 
4024
3582
 
4025
3583
# Try to load the compiled form if possible
4026
3584
try:
4027
 
    from bzrlib._dirstate_helpers_pyx import (
4028
 
        _read_dirblocks,
4029
 
        bisect_dirblock,
4030
 
        _bisect_path_left,
4031
 
        _bisect_path_right,
4032
 
        cmp_by_dirs,
 
3585
    from bzrlib._dirstate_helpers_c import (
 
3586
        _read_dirblocks_c as _read_dirblocks,
 
3587
        bisect_dirblock_c as bisect_dirblock,
 
3588
        _bisect_path_left_c as _bisect_path_left,
 
3589
        _bisect_path_right_c as _bisect_path_right,
 
3590
        cmp_by_dirs_c as cmp_by_dirs,
4033
3591
        ProcessEntryC as _process_entry,
4034
3592
        update_entry as update_entry,
4035
3593
        )
4036
 
except ImportError, e:
4037
 
    osutils.failed_to_load_extension(e)
 
3594
except ImportError:
4038
3595
    from bzrlib._dirstate_helpers_py import (
4039
 
        _read_dirblocks,
4040
 
        bisect_dirblock,
4041
 
        _bisect_path_left,
4042
 
        _bisect_path_right,
4043
 
        cmp_by_dirs,
 
3596
        _read_dirblocks_py as _read_dirblocks,
 
3597
        bisect_dirblock_py as bisect_dirblock,
 
3598
        _bisect_path_left_py as _bisect_path_left,
 
3599
        _bisect_path_right_py as _bisect_path_right,
 
3600
        cmp_by_dirs_py as cmp_by_dirs,
4044
3601
        )
4045
 
    # FIXME: It would be nice to be able to track moved lines so that the
4046
 
    # corresponding python code can be moved to the _dirstate_helpers_py
4047
 
    # module. I don't want to break the history for this important piece of
4048
 
    # code so I left the code here -- vila 20090622
4049
 
    update_entry = py_update_entry
4050
 
    _process_entry = ProcessEntryPython