~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Vincent Ladeuil
  • Date: 2009-04-06 14:01:16 UTC
  • mto: (4241.10.1 bzr.1.14)
  • mto: This revision was merged to the branch mainline in revision 4267.
  • Revision ID: v.ladeuil+lp@free.fr-20090406140116-zkbvjtaxfhbql4l4
Fix unicode related OSX failures.

* bzrlib/tests/test_export.py: 
Fix imports.

* bzrlib/tests/blackbox/test_export.py:
(TestExport.test_tar_export_unicode,
TestExport.test_zip_export_unicode): Use a less problematic
unicode char for OSX.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2006-2010 Canonical Ltd
 
1
# Copyright (C) 2006, 2007, 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
187
187
the file on disk, and then immediately discard, the overhead of object creation
188
188
becomes a significant cost.
189
189
 
190
 
Figures: Creating a tuple from 3 elements was profiled at 0.0625
 
190
Figures: Creating a tuple from from 3 elements was profiled at 0.0625
191
191
microseconds, whereas creating a object which is subclassed from tuple was
192
192
0.500 microseconds, and creating an object with 3 elements and slots was 3
193
193
microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get
204
204
import bisect
205
205
import binascii
206
206
import errno
207
 
import operator
208
207
import os
209
208
from stat import S_IEXEC
210
209
import stat
220
219
    inventory,
221
220
    lock,
222
221
    osutils,
223
 
    static_tuple,
224
222
    trace,
225
223
    )
226
224
 
269
267
    """An interface for getting sha1s of a file."""
270
268
 
271
269
    def sha1(self, abspath):
272
 
        """Return the sha1 of a file given its absolute path.
273
 
 
274
 
        :param abspath:  May be a filesystem encoded absolute path
275
 
             or a unicode path.
276
 
        """
 
270
        """Return the sha1 of a file given its absolute path."""
277
271
        raise NotImplementedError(self.sha1)
278
272
 
279
273
    def stat_and_sha1(self, abspath):
280
274
        """Return the stat and sha1 of a file given its absolute path.
281
275
        
282
 
        :param abspath:  May be a filesystem encoded absolute path
283
 
             or a unicode path.
284
 
 
285
276
        Note: the stat should be the stat of the physical file
286
277
        while the sha may be the sha of its canonical content.
287
278
        """
549
540
           self._ensure_block(block_index, entry_index, utf8path)
550
541
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
551
542
        if self._id_index:
552
 
            self._add_to_id_index(self._id_index, entry_key)
 
543
            self._id_index.setdefault(entry_key[2], set()).add(entry_key)
553
544
 
554
545
    def _bisect(self, paths):
555
546
        """Bisect through the disk structure for specific rows.
1279
1270
    def update_by_delta(self, delta):
1280
1271
        """Apply an inventory delta to the dirstate for tree 0
1281
1272
 
1282
 
        This is the workhorse for apply_inventory_delta in dirstate based
1283
 
        trees.
1284
 
 
1285
1273
        :param delta: An inventory delta.  See Inventory.apply_delta for
1286
1274
            details.
1287
1275
        """
1288
1276
        self._read_dirblocks_if_needed()
1289
 
        encode = cache_utf8.encode
1290
1277
        insertions = {}
1291
1278
        removals = {}
1292
 
        # Accumulate parent references (path_utf8, id), to check for parentless
1293
 
        # items or items placed under files/links/tree-references. We get
1294
 
        # references from every item in the delta that is not a deletion and
1295
 
        # is not itself the root.
1296
 
        parents = set()
1297
 
        # Added ids must not be in the dirstate already. This set holds those
1298
 
        # ids.
1299
 
        new_ids = set()
1300
 
        # This loop transforms the delta to single atomic operations that can
1301
 
        # be executed and validated.
1302
 
        for old_path, new_path, file_id, inv_entry in sorted(
1303
 
            inventory._check_delta_unique_old_paths(
1304
 
            inventory._check_delta_unique_new_paths(
1305
 
            inventory._check_delta_ids_match_entry(
1306
 
            inventory._check_delta_ids_are_valid(
1307
 
            inventory._check_delta_new_path_entry_both_or_None(delta))))),
1308
 
            reverse=True):
 
1279
        for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
1309
1280
            if (file_id in insertions) or (file_id in removals):
1310
 
                raise errors.InconsistentDelta(old_path or new_path, file_id,
1311
 
                    "repeated file_id")
 
1281
                raise AssertionError("repeated file id in delta %r" % (file_id,))
1312
1282
            if old_path is not None:
1313
1283
                old_path = old_path.encode('utf-8')
1314
1284
                removals[file_id] = old_path
1315
 
            else:
1316
 
                new_ids.add(file_id)
1317
1285
            if new_path is not None:
1318
 
                if inv_entry is None:
1319
 
                    raise errors.InconsistentDelta(new_path, file_id,
1320
 
                        "new_path with no entry")
1321
1286
                new_path = new_path.encode('utf-8')
1322
 
                dirname_utf8, basename = osutils.split(new_path)
1323
 
                if basename:
1324
 
                    parents.add((dirname_utf8, inv_entry.parent_id))
1325
 
                key = (dirname_utf8, basename, file_id)
 
1287
                dirname, basename = osutils.split(new_path)
 
1288
                key = (dirname, basename, file_id)
1326
1289
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1327
1290
                if minikind == 't':
1328
 
                    fingerprint = inv_entry.reference_revision or ''
 
1291
                    fingerprint = inv_entry.reference_revision
1329
1292
                else:
1330
1293
                    fingerprint = ''
1331
1294
                insertions[file_id] = (key, minikind, inv_entry.executable,
1340
1303
                    minikind = child[1][0][0]
1341
1304
                    fingerprint = child[1][0][4]
1342
1305
                    executable = child[1][0][3]
1343
 
                    old_child_path = osutils.pathjoin(child_dirname,
1344
 
                                                      child_basename)
 
1306
                    old_child_path = osutils.pathjoin(child[0][0],
 
1307
                                                      child[0][1])
1345
1308
                    removals[child[0][2]] = old_child_path
1346
1309
                    child_suffix = child_dirname[len(old_path):]
1347
1310
                    new_child_dirname = (new_path + child_suffix)
1348
1311
                    key = (new_child_dirname, child_basename, child[0][2])
1349
 
                    new_child_path = osutils.pathjoin(new_child_dirname,
1350
 
                                                      child_basename)
 
1312
                    new_child_path = os.path.join(new_child_dirname,
 
1313
                                                  child_basename)
1351
1314
                    insertions[child[0][2]] = (key, minikind, executable,
1352
1315
                                               fingerprint, new_child_path)
1353
 
        self._check_delta_ids_absent(new_ids, delta, 0)
1354
 
        try:
1355
 
            self._apply_removals(removals.iteritems())
1356
 
            self._apply_insertions(insertions.values())
1357
 
            # Validate parents
1358
 
            self._after_delta_check_parents(parents, 0)
1359
 
        except errors.BzrError, e:
1360
 
            self._changes_aborted = True
1361
 
            if 'integrity error' not in str(e):
1362
 
                raise
1363
 
            # _get_entry raises BzrError when a request is inconsistent; we
1364
 
            # want such errors to be shown as InconsistentDelta - and that 
1365
 
            # fits the behaviour we trigger.
1366
 
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
 
1316
        self._apply_removals(removals.values())
 
1317
        self._apply_insertions(insertions.values())
1367
1318
 
1368
1319
    def _apply_removals(self, removals):
1369
 
        for file_id, path in sorted(removals, reverse=True,
1370
 
            key=operator.itemgetter(1)):
 
1320
        for path in sorted(removals, reverse=True):
1371
1321
            dirname, basename = osutils.split(path)
1372
1322
            block_i, entry_i, d_present, f_present = \
1373
1323
                self._get_block_entry_index(dirname, basename, 0)
1374
 
            try:
1375
 
                entry = self._dirblocks[block_i][1][entry_i]
1376
 
            except IndexError:
1377
 
                self._changes_aborted = True
1378
 
                raise errors.InconsistentDelta(path, file_id,
1379
 
                    "Wrong path for old path.")
1380
 
            if not f_present or entry[1][0][0] in 'ar':
1381
 
                self._changes_aborted = True
1382
 
                raise errors.InconsistentDelta(path, file_id,
1383
 
                    "Wrong path for old path.")
1384
 
            if file_id != entry[0][2]:
1385
 
                self._changes_aborted = True
1386
 
                raise errors.InconsistentDelta(path, file_id,
1387
 
                    "Attempt to remove path has wrong id - found %r."
1388
 
                    % entry[0][2])
 
1324
            entry = self._dirblocks[block_i][1][entry_i]
1389
1325
            self._make_absent(entry)
1390
1326
            # See if we have a malformed delta: deleting a directory must not
1391
1327
            # leave crud behind. This increases the number of bisects needed
1399
1335
                # be due to it being in a parent tree, or a corrupt delta.
1400
1336
                for child_entry in self._dirblocks[block_i][1]:
1401
1337
                    if child_entry[1][0][0] not in ('r', 'a'):
1402
 
                        self._changes_aborted = True
1403
1338
                        raise errors.InconsistentDelta(path, entry[0][2],
1404
1339
                            "The file id was deleted but its children were "
1405
1340
                            "not deleted.")
1406
1341
 
1407
1342
    def _apply_insertions(self, adds):
1408
 
        try:
1409
 
            for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1410
 
                self.update_minimal(key, minikind, executable, fingerprint,
1411
 
                                    path_utf8=path_utf8)
1412
 
        except errors.NotVersionedError:
1413
 
            self._changes_aborted = True
1414
 
            raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
1415
 
                "Missing parent")
 
1343
        for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
 
1344
            self.update_minimal(key, minikind, executable, fingerprint,
 
1345
                                path_utf8=path_utf8)
1416
1346
 
1417
1347
    def update_basis_by_delta(self, delta, new_revid):
1418
1348
        """Update the parents of this tree after a commit.
1462
1392
        # At the same time, to reduce interface friction we convert the input
1463
1393
        # inventory entries to dirstate.
1464
1394
        root_only = ('', '')
1465
 
        # Accumulate parent references (path_utf8, id), to check for parentless
1466
 
        # items or items placed under files/links/tree-references. We get
1467
 
        # references from every item in the delta that is not a deletion and
1468
 
        # is not itself the root.
1469
 
        parents = set()
1470
 
        # Added ids must not be in the dirstate already. This set holds those
1471
 
        # ids.
1472
 
        new_ids = set()
1473
1395
        for old_path, new_path, file_id, inv_entry in delta:
1474
 
            if inv_entry is not None and file_id != inv_entry.file_id:
1475
 
                raise errors.InconsistentDelta(new_path, file_id,
1476
 
                    "mismatched entry file_id %r" % inv_entry)
1477
 
            if new_path is not None:
1478
 
                if inv_entry is None:
1479
 
                    raise errors.InconsistentDelta(new_path, file_id,
1480
 
                        "new_path with no entry")
1481
 
                new_path_utf8 = encode(new_path)
1482
 
                # note the parent for validation
1483
 
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1484
 
                if basename_utf8:
1485
 
                    parents.add((dirname_utf8, inv_entry.parent_id))
1486
1396
            if old_path is None:
1487
1397
                adds.append((None, encode(new_path), file_id,
1488
1398
                    inv_to_entry(inv_entry), True))
1489
 
                new_ids.add(file_id)
1490
1399
            elif new_path is None:
1491
1400
                deletes.append((encode(old_path), None, file_id, None, True))
1492
1401
            elif (old_path, new_path) != root_only:
1504
1413
                # for 'r' items on every pass.
1505
1414
                self._update_basis_apply_deletes(deletes)
1506
1415
                deletes = []
 
1416
                new_path_utf8 = encode(new_path)
1507
1417
                # Split into an add/delete pair recursively.
1508
1418
                adds.append((None, new_path_utf8, file_id,
1509
1419
                    inv_to_entry(inv_entry), False))
1535
1445
                # of everything.
1536
1446
                changes.append((encode(old_path), encode(new_path), file_id,
1537
1447
                    inv_to_entry(inv_entry)))
1538
 
        self._check_delta_ids_absent(new_ids, delta, 1)
1539
 
        try:
1540
 
            # Finish expunging deletes/first half of renames.
1541
 
            self._update_basis_apply_deletes(deletes)
1542
 
            # Reinstate second half of renames and new paths.
1543
 
            self._update_basis_apply_adds(adds)
1544
 
            # Apply in-situ changes.
1545
 
            self._update_basis_apply_changes(changes)
1546
 
            # Validate parents
1547
 
            self._after_delta_check_parents(parents, 1)
1548
 
        except errors.BzrError, e:
1549
 
            self._changes_aborted = True
1550
 
            if 'integrity error' not in str(e):
1551
 
                raise
1552
 
            # _get_entry raises BzrError when a request is inconsistent; we
1553
 
            # want such errors to be shown as InconsistentDelta - and that 
1554
 
            # fits the behaviour we trigger. Partof this is driven by dirstate
1555
 
            # only supporting deltas that turn the basis into a closer fit to
1556
 
            # the active tree.
1557
 
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
 
1448
 
 
1449
        # Finish expunging deletes/first half of renames.
 
1450
        self._update_basis_apply_deletes(deletes)
 
1451
        # Reinstate second half of renames and new paths.
 
1452
        self._update_basis_apply_adds(adds)
 
1453
        # Apply in-situ changes.
 
1454
        self._update_basis_apply_changes(changes)
1558
1455
 
1559
1456
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1560
1457
        self._header_state = DirState.IN_MEMORY_MODIFIED
1561
1458
        self._id_index = None
1562
1459
        return
1563
1460
 
1564
 
    def _check_delta_ids_absent(self, new_ids, delta, tree_index):
1565
 
        """Check that none of the file_ids in new_ids are present in a tree."""
1566
 
        if not new_ids:
1567
 
            return
1568
 
        id_index = self._get_id_index()
1569
 
        for file_id in new_ids:
1570
 
            for key in id_index.get(file_id, ()):
1571
 
                block_i, entry_i, d_present, f_present = \
1572
 
                    self._get_block_entry_index(key[0], key[1], tree_index)
1573
 
                if not f_present:
1574
 
                    # In a different tree
1575
 
                    continue
1576
 
                entry = self._dirblocks[block_i][1][entry_i]
1577
 
                if entry[0][2] != file_id:
1578
 
                    # Different file_id, so not what we want.
1579
 
                    continue
1580
 
                # NB: No changes made before this helper is called, so no need
1581
 
                # to set the _changes_aborted flag.
1582
 
                raise errors.InconsistentDelta(
1583
 
                    ("%s/%s" % key[0:2]).decode('utf8'), file_id,
1584
 
                    "This file_id is new in the delta but already present in "
1585
 
                    "the target")
1586
 
 
1587
1461
    def _update_basis_apply_adds(self, adds):
1588
1462
        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
1589
1463
 
1654
1528
        null = DirState.NULL_PARENT_DETAILS
1655
1529
        for old_path, new_path, file_id, _, real_delete in deletes:
1656
1530
            if real_delete != (new_path is None):
1657
 
                self._changes_aborted = True
1658
1531
                raise AssertionError("bad delete delta")
1659
1532
            # the entry for this file_id must be in tree 1.
1660
1533
            dirname, basename = osutils.split(old_path)
1693
1566
                    # it is being resurrected here, so blank it out temporarily.
1694
1567
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1695
1568
 
1696
 
    def _after_delta_check_parents(self, parents, index):
1697
 
        """Check that parents required by the delta are all intact.
1698
 
        
1699
 
        :param parents: An iterable of (path_utf8, file_id) tuples which are
1700
 
            required to be present in tree 'index' at path_utf8 with id file_id
1701
 
            and be a directory.
1702
 
        :param index: The column in the dirstate to check for parents in.
1703
 
        """
1704
 
        for dirname_utf8, file_id in parents:
1705
 
            # Get the entry - the ensures that file_id, dirname_utf8 exists and
1706
 
            # has the right file id.
1707
 
            entry = self._get_entry(index, file_id, dirname_utf8)
1708
 
            if entry[1] is None:
1709
 
                self._changes_aborted = True
1710
 
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
1711
 
                    file_id, "This parent is not present.")
1712
 
            # Parents of things must be directories
1713
 
            if entry[1][index][0] != 'd':
1714
 
                self._changes_aborted = True
1715
 
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
1716
 
                    file_id, "This parent is not a directory.")
1717
 
 
1718
1569
    def _observed_sha1(self, entry, sha1, stat_value,
1719
1570
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1720
1571
        """Note the sha1 of a file.
1963
1814
        self._read_dirblocks_if_needed()
1964
1815
        if path_utf8 is not None:
1965
1816
            if type(path_utf8) is not str:
1966
 
                raise errors.BzrError('path_utf8 is not a str: %s %r'
 
1817
                raise AssertionError('path_utf8 is not a str: %s %s'
1967
1818
                    % (type(path_utf8), path_utf8))
1968
1819
            # path lookups are faster
1969
1820
            dirname, basename = osutils.split(path_utf8)
1981
1832
                                          ' tree_index, file_id and path')
1982
1833
            return entry
1983
1834
        else:
1984
 
            possible_keys = self._get_id_index().get(fileid_utf8, ())
 
1835
            possible_keys = self._get_id_index().get(fileid_utf8, None)
1985
1836
            if not possible_keys:
1986
1837
                return None, None
1987
1838
            for key in possible_keys:
1998
1849
                entry_index, present = self._find_entry_index(key, block)
1999
1850
                if present:
2000
1851
                    entry = self._dirblocks[block_index][1][entry_index]
2001
 
                    # TODO: We might want to assert that entry[0][2] ==
2002
 
                    #       fileid_utf8.
2003
1852
                    if entry[1][tree_index][0] in 'fdlt':
2004
1853
                        # this is the result we are looking for: the
2005
1854
                        # real home of this file_id in this tree.
2144
1993
                yield entry
2145
1994
 
2146
1995
    def _get_id_index(self):
2147
 
        """Get an id index of self._dirblocks.
2148
 
        
2149
 
        This maps from file_id => [(directory, name, file_id)] entries where
2150
 
        that file_id appears in one of the trees.
2151
 
        """
 
1996
        """Get an id index of self._dirblocks."""
2152
1997
        if self._id_index is None:
2153
1998
            id_index = {}
2154
1999
            for key, tree_details in self._iter_entries():
2155
 
                self._add_to_id_index(id_index, key)
 
2000
                id_index.setdefault(key[2], set()).add(key)
2156
2001
            self._id_index = id_index
2157
2002
        return self._id_index
2158
2003
 
2159
 
    def _add_to_id_index(self, id_index, entry_key):
2160
 
        """Add this entry to the _id_index mapping."""
2161
 
        # This code used to use a set for every entry in the id_index. However,
2162
 
        # it is *rare* to have more than one entry. So a set is a large
2163
 
        # overkill. And even when we do, we won't ever have more than the
2164
 
        # number of parent trees. Which is still a small number (rarely >2). As
2165
 
        # such, we use a simple tuple, and do our own uniqueness checks. While
2166
 
        # the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2167
 
        # cause quadratic failure.
2168
 
        # TODO: This should use StaticTuple
2169
 
        file_id = entry_key[2]
2170
 
        entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2171
 
        if file_id not in id_index:
2172
 
            id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2173
 
        else:
2174
 
            entry_keys = id_index[file_id]
2175
 
            if entry_key not in entry_keys:
2176
 
                id_index[file_id] = entry_keys + (entry_key,)
2177
 
 
2178
 
    def _remove_from_id_index(self, id_index, entry_key):
2179
 
        """Remove this entry from the _id_index mapping.
2180
 
 
2181
 
        It is an programming error to call this when the entry_key is not
2182
 
        already present.
2183
 
        """
2184
 
        file_id = entry_key[2]
2185
 
        entry_keys = list(id_index[file_id])
2186
 
        entry_keys.remove(entry_key)
2187
 
        id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2188
 
 
2189
2004
    def _get_output_lines(self, lines):
2190
2005
        """Format lines for final output.
2191
2006
 
2213
2028
 
2214
2029
    @staticmethod
2215
2030
    def on_file(path, sha1_provider=None):
2216
 
        """Construct a DirState on the file at path "path".
 
2031
        """Construct a DirState on the file at path path.
2217
2032
 
2218
2033
        :param path: The path at which the dirstate file on disk should live.
2219
2034
        :param sha1_provider: an object meeting the SHA1Provider interface.
2391
2206
        self.update_minimal(('', '', new_id), 'd',
2392
2207
            path_utf8='', packed_stat=entry[1][0][4])
2393
2208
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2209
        if self._id_index is not None:
 
2210
            self._id_index.setdefault(new_id, set()).add(entry[0])
2394
2211
 
2395
2212
    def set_parent_trees(self, trees, ghosts):
2396
2213
        """Set the parent trees for the dirstate.
2449
2266
                continue
2450
2267
            by_path[entry[0]] = [entry[1][0]] + \
2451
2268
                [DirState.NULL_PARENT_DETAILS] * parent_count
2452
 
            # TODO: Possibly inline this, since we know it isn't present yet
2453
 
            #       id_index[entry[0][2]] = (entry[0],)
2454
 
            self._add_to_id_index(id_index, entry[0])
 
2269
            id_index[entry[0][2]] = set([entry[0]])
2455
2270
 
2456
2271
        # now the parent trees:
2457
2272
        for tree_index, tree in enumerate(parent_trees):
2479
2294
                new_entry_key = (dirname, basename, file_id)
2480
2295
                # tree index consistency: All other paths for this id in this tree
2481
2296
                # index must point to the correct path.
2482
 
                for entry_key in id_index.get(file_id, ()):
 
2297
                for entry_key in id_index.setdefault(file_id, set()):
2483
2298
                    # TODO:PROFILING: It might be faster to just update
2484
2299
                    # rather than checking if we need to, and then overwrite
2485
2300
                    # the one we are located at.
2491
2306
                        by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')
2492
2307
                # by path consistency: Insert into an existing path record (trivial), or
2493
2308
                # add a new one with relocation pointers for the other tree indexes.
2494
 
                entry_keys = id_index.get(file_id, ())
2495
 
                if new_entry_key in entry_keys:
 
2309
                if new_entry_key in id_index[file_id]:
2496
2310
                    # there is already an entry where this data belongs, just insert it.
2497
2311
                    by_path[new_entry_key][tree_index] = \
2498
2312
                        self._inv_entry_to_details(entry)
2503
2317
                    new_details = []
2504
2318
                    for lookup_index in xrange(tree_index):
2505
2319
                        # boundary case: this is the first occurence of file_id
2506
 
                        # so there are no id_indexes, possibly take this out of
 
2320
                        # so there are no id_indexs, possibly take this out of
2507
2321
                        # the loop?
2508
 
                        if not len(entry_keys):
 
2322
                        if not len(id_index[file_id]):
2509
2323
                            new_details.append(DirState.NULL_PARENT_DETAILS)
2510
2324
                        else:
2511
2325
                            # grab any one entry, use it to find the right path.
2512
2326
                            # TODO: optimise this to reduce memory use in highly
2513
2327
                            # fragmented situations by reusing the relocation
2514
2328
                            # records.
2515
 
                            a_key = iter(entry_keys).next()
 
2329
                            a_key = iter(id_index[file_id]).next()
2516
2330
                            if by_path[a_key][lookup_index][0] in ('r', 'a'):
2517
2331
                                # its a pointer or missing statement, use it as is.
2518
2332
                                new_details.append(by_path[a_key][lookup_index])
2523
2337
                    new_details.append(self._inv_entry_to_details(entry))
2524
2338
                    new_details.extend(new_location_suffix)
2525
2339
                    by_path[new_entry_key] = new_details
2526
 
                    self._add_to_id_index(id_index, new_entry_key)
 
2340
                    id_index[file_id].add(new_entry_key)
2527
2341
        # --- end generation of full tree mappings
2528
2342
 
2529
2343
        # sort and output all the entries
2558
2372
        if 'evil' in debug.debug_flags:
2559
2373
            trace.mutter_callsite(1,
2560
2374
                "set_state_from_inventory called; please mutate the tree instead")
2561
 
        tracing = 'dirstate' in debug.debug_flags
2562
 
        if tracing:
2563
 
            trace.mutter("set_state_from_inventory trace:")
2564
2375
        self._read_dirblocks_if_needed()
2565
2376
        # sketch:
2566
2377
        # Two iterators: current data and new data, both in dirblock order.
2575
2386
        new_iterator = new_inv.iter_entries_by_dir()
2576
2387
        # we will be modifying the dirstate, so we need a stable iterator. In
2577
2388
        # future we might write one, for now we just clone the state into a
2578
 
        # list using a copy so that we see every original item and don't have
2579
 
        # to adjust the position when items are inserted or deleted in the
2580
 
        # underlying dirstate.
 
2389
        # list - which is a shallow copy.
2581
2390
        old_iterator = iter(list(self._iter_entries()))
2582
2391
        # both must have roots so this is safe:
2583
2392
        current_new = new_iterator.next()
2617
2426
            # we make both end conditions explicit
2618
2427
            if not current_old:
2619
2428
                # old is finished: insert current_new into the state.
2620
 
                if tracing:
2621
 
                    trace.mutter("Appending from new '%s'.",
2622
 
                        new_path_utf8.decode('utf8'))
2623
2429
                self.update_minimal(new_entry_key, current_new_minikind,
2624
2430
                    executable=current_new[1].executable,
2625
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
2626
 
                    fullscan=True)
 
2431
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
2627
2432
                current_new = advance(new_iterator)
2628
2433
            elif not current_new:
2629
2434
                # new is finished
2630
 
                if tracing:
2631
 
                    trace.mutter("Truncating from old '%s/%s'.",
2632
 
                        current_old[0][0].decode('utf8'),
2633
 
                        current_old[0][1].decode('utf8'))
2634
2435
                self._make_absent(current_old)
2635
2436
                current_old = advance(old_iterator)
2636
2437
            elif new_entry_key == current_old[0]:
2643
2444
                # kind has changed.
2644
2445
                if (current_old[1][0][3] != current_new[1].executable or
2645
2446
                    current_old[1][0][0] != current_new_minikind):
2646
 
                    if tracing:
2647
 
                        trace.mutter("Updating in-place change '%s'.",
2648
 
                            new_path_utf8.decode('utf8'))
2649
2447
                    self.update_minimal(current_old[0], current_new_minikind,
2650
2448
                        executable=current_new[1].executable,
2651
 
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
2652
 
                        fullscan=True)
 
2449
                        path_utf8=new_path_utf8, fingerprint=fingerprint)
2653
2450
                # both sides are dealt with, move on
2654
2451
                current_old = advance(old_iterator)
2655
2452
                current_new = advance(new_iterator)
2658
2455
                      and new_entry_key[1:] < current_old[0][1:])):
2659
2456
                # new comes before:
2660
2457
                # add a entry for this and advance new
2661
 
                if tracing:
2662
 
                    trace.mutter("Inserting from new '%s'.",
2663
 
                        new_path_utf8.decode('utf8'))
2664
2458
                self.update_minimal(new_entry_key, current_new_minikind,
2665
2459
                    executable=current_new[1].executable,
2666
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
2667
 
                    fullscan=True)
 
2460
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
2668
2461
                current_new = advance(new_iterator)
2669
2462
            else:
2670
2463
                # we've advanced past the place where the old key would be,
2671
2464
                # without seeing it in the new list.  so it must be gone.
2672
 
                if tracing:
2673
 
                    trace.mutter("Deleting from old '%s/%s'.",
2674
 
                        current_old[0][0].decode('utf8'),
2675
 
                        current_old[0][1].decode('utf8'))
2676
2465
                self._make_absent(current_old)
2677
2466
                current_old = advance(old_iterator)
2678
2467
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2679
2468
        self._id_index = None
2680
2469
        self._packed_stat_index = None
2681
 
        if tracing:
2682
 
            trace.mutter("set_state_from_inventory complete.")
2683
2470
 
2684
2471
    def _make_absent(self, current_old):
2685
2472
        """Mark current_old - an entry - as absent for tree 0.
2711
2498
            block[1].pop(entry_index)
2712
2499
            # if we have an id_index in use, remove this key from it for this id.
2713
2500
            if self._id_index is not None:
2714
 
                self._remove_from_id_index(self._id_index, current_old[0])
 
2501
                self._id_index[current_old[0][2]].remove(current_old[0])
2715
2502
        # update all remaining keys for this id to record it as absent. The
2716
2503
        # existing details may either be the record we are marking as deleted
2717
2504
        # (if there were other trees with the id present at this path), or may
2734
2521
        return last_reference
2735
2522
 
2736
2523
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2737
 
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
 
2524
                       packed_stat=None, size=0, path_utf8=None):
2738
2525
        """Update an entry to the state in tree 0.
2739
2526
 
2740
2527
        This will either create a new entry at 'key' or update an existing one.
2751
2538
        :param size: Size information for new entry
2752
2539
        :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2753
2540
                extra computation.
2754
 
        :param fullscan: If True then a complete scan of the dirstate is being
2755
 
            done and checking for duplicate rows should not be done. This
2756
 
            should only be set by set_state_from_inventory and similar methods.
2757
2541
 
2758
2542
        If packed_stat and fingerprint are not given, they're invalidated in
2759
2543
        the entry.
2768
2552
        new_details = (minikind, fingerprint, size, executable, packed_stat)
2769
2553
        id_index = self._get_id_index()
2770
2554
        if not present:
2771
 
            # New record. Check there isn't a entry at this path already.
2772
 
            if not fullscan:
2773
 
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
2774
 
                while low_index < len(block):
2775
 
                    entry = block[low_index]
2776
 
                    if entry[0][0:2] == key[0:2]:
2777
 
                        if entry[1][0][0] not in 'ar':
2778
 
                            # This entry has the same path (but a different id) as
2779
 
                            # the new entry we're adding, and is present in ths
2780
 
                            # tree.
2781
 
                            raise errors.InconsistentDelta(
2782
 
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
2783
 
                                "Attempt to add item at path already occupied by "
2784
 
                                "id %r" % entry[0][2])
2785
 
                        low_index += 1
2786
 
                    else:
2787
 
                        break
2788
2555
            # new entry, synthesis cross reference here,
2789
 
            existing_keys = id_index.get(key[2], ())
 
2556
            existing_keys = id_index.setdefault(key[2], set())
2790
2557
            if not existing_keys:
2791
2558
                # not currently in the state, simplest case
2792
2559
                new_entry = key, [new_details] + self._empty_parent_info()
2795
2562
                # grab one of them and use it to generate parent
2796
2563
                # relocation/absent entries.
2797
2564
                new_entry = key, [new_details]
2798
 
                # existing_keys can be changed as we iterate.
2799
 
                for other_key in tuple(existing_keys):
 
2565
                for other_key in existing_keys:
2800
2566
                    # change the record at other to be a pointer to this new
2801
2567
                    # record. The loop looks similar to the change to
2802
2568
                    # relocations when updating an existing record but its not:
2803
2569
                    # the test for existing kinds is different: this can be
2804
2570
                    # factored out to a helper though.
2805
 
                    other_block_index, present = self._find_block_index_from_key(
2806
 
                        other_key)
2807
 
                    if not present:
2808
 
                        raise AssertionError('could not find block for %s' % (
2809
 
                            other_key,))
2810
 
                    other_block = self._dirblocks[other_block_index][1]
2811
 
                    other_entry_index, present = self._find_entry_index(
2812
 
                        other_key, other_block)
2813
 
                    if not present:
2814
 
                        raise AssertionError(
2815
 
                            'update_minimal: could not find other entry for %s'
2816
 
                            % (other_key,))
 
2571
                    other_block_index, present = self._find_block_index_from_key(other_key)
 
2572
                    if not present:
 
2573
                        raise AssertionError('could not find block for %s' % (other_key,))
 
2574
                    other_entry_index, present = self._find_entry_index(other_key,
 
2575
                                            self._dirblocks[other_block_index][1])
 
2576
                    if not present:
 
2577
                        raise AssertionError('could not find entry for %s' % (other_key,))
2817
2578
                    if path_utf8 is None:
2818
2579
                        raise AssertionError('no path')
2819
 
                    # Turn this other location into a reference to the new
2820
 
                    # location. This also updates the aliased iterator
2821
 
                    # (current_old in set_state_from_inventory) so that the old
2822
 
                    # entry, if not already examined, is skipped over by that
2823
 
                    # loop.
2824
 
                    other_entry = other_block[other_entry_index]
2825
 
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
2826
 
                    if self._maybe_remove_row(other_block, other_entry_index,
2827
 
                                              id_index):
2828
 
                        # If the row holding this was removed, we need to
2829
 
                        # recompute where this entry goes
2830
 
                        entry_index, _ = self._find_entry_index(key, block)
 
2580
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
 
2581
                        ('r', path_utf8, 0, False, '')
2831
2582
 
2832
 
                # This loop:
2833
 
                # adds a tuple to the new details for each column
2834
 
                #  - either by copying an existing relocation pointer inside that column
2835
 
                #  - or by creating a new pointer to the right row inside that column
2836
2583
                num_present_parents = self._num_present_parents()
2837
 
                if num_present_parents:
2838
 
                    # TODO: This re-evaluates the existing_keys set, do we need
2839
 
                    #       to do that ourselves?
2840
 
                    other_key = list(existing_keys)[0]
2841
2584
                for lookup_index in xrange(1, num_present_parents + 1):
2842
2585
                    # grab any one entry, use it to find the right path.
2843
2586
                    # TODO: optimise this to reduce memory use in highly
2850
2593
                    update_entry_index, present = \
2851
2594
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
2852
2595
                    if not present:
2853
 
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
 
2596
                        raise AssertionError('could not find entry for %s' % (other_key,))
2854
2597
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
2855
2598
                    if update_details[0] in 'ar': # relocated, absent
2856
2599
                        # its a pointer or absent in lookup_index's tree, use
2861
2604
                        pointer_path = osutils.pathjoin(*other_key[0:2])
2862
2605
                        new_entry[1].append(('r', pointer_path, 0, False, ''))
2863
2606
            block.insert(entry_index, new_entry)
2864
 
            self._add_to_id_index(id_index, key)
 
2607
            existing_keys.add(key)
2865
2608
        else:
2866
2609
            # Does the new state matter?
2867
2610
            block[entry_index][1][0] = new_details
2876
2619
            # converted to relocated.
2877
2620
            if path_utf8 is None:
2878
2621
                raise AssertionError('no path')
2879
 
            existing_keys = id_index.get(key[2], ())
2880
 
            if key not in existing_keys:
2881
 
                raise AssertionError('We found the entry in the blocks, but'
2882
 
                    ' the key is not in the id_index.'
2883
 
                    ' key: %s, existing_keys: %s' % (key, existing_keys))
2884
 
            for entry_key in existing_keys:
 
2622
            for entry_key in id_index.setdefault(key[2], set()):
2885
2623
                # TODO:PROFILING: It might be faster to just update
2886
2624
                # rather than checking if we need to, and then overwrite
2887
2625
                # the one we are located at.
2907
2645
 
2908
2646
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2909
2647
 
2910
 
    def _maybe_remove_row(self, block, index, id_index):
2911
 
        """Remove index if it is absent or relocated across the row.
2912
 
        
2913
 
        id_index is updated accordingly.
2914
 
        :return: True if we removed the row, False otherwise
2915
 
        """
2916
 
        present_in_row = False
2917
 
        entry = block[index]
2918
 
        for column in entry[1]:
2919
 
            if column[0] not in 'ar':
2920
 
                present_in_row = True
2921
 
                break
2922
 
        if not present_in_row:
2923
 
            block.pop(index)
2924
 
            self._remove_from_id_index(id_index, entry[0])
2925
 
            return True
2926
 
        return False
2927
 
 
2928
2648
    def _validate(self):
2929
2649
        """Check that invariants on the dirblock are correct.
2930
2650
 
3064
2784
            if absent_positions == tree_count:
3065
2785
                raise AssertionError(
3066
2786
                    "entry %r has no data for any tree." % (entry,))
3067
 
        if self._id_index is not None:
3068
 
            for file_id, entry_keys in self._id_index.iteritems():
3069
 
                for entry_key in entry_keys:
3070
 
                    if entry_key[2] != file_id:
3071
 
                        raise AssertionError(
3072
 
                            'file_id %r did not match entry key %s'
3073
 
                            % (file_id, entry_key))
3074
 
                if len(entry_keys) != len(set(entry_keys)):
3075
 
                    raise AssertionError(
3076
 
                        'id_index contained non-unique data for %s'
3077
 
                        % (entry_keys,))
3078
2787
 
3079
2788
    def _wipe_state(self):
3080
2789
        """Forget all state information about the dirstate."""
3222
2931
                           False, DirState.NULLSTAT)
3223
2932
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
3224
2933
    return link_or_sha1
 
2934
update_entry = py_update_entry
3225
2935
 
3226
2936
 
3227
2937
class ProcessEntryPython(object):
3228
2938
 
3229
 
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
 
2939
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
3230
2940
        "last_source_parent", "last_target_parent", "include_unchanged",
3231
 
        "partial", "use_filesystem_for_exec", "utf8_decode",
3232
 
        "searched_specific_files", "search_specific_files",
3233
 
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
3234
 
        "state", "source_index", "target_index", "want_unversioned", "tree"]
 
2941
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
 
2942
        "search_specific_files", "state", "source_index", "target_index",
 
2943
        "want_unversioned", "tree"]
3235
2944
 
3236
2945
    def __init__(self, include_unchanged, use_filesystem_for_exec,
3237
2946
        search_specific_files, state, source_index, target_index,
3238
2947
        want_unversioned, tree):
3239
2948
        self.old_dirname_to_file_id = {}
3240
2949
        self.new_dirname_to_file_id = {}
3241
 
        # Are we doing a partial iter_changes?
3242
 
        self.partial = search_specific_files != set([''])
 
2950
        # Just a sentry, so that _process_entry can say that this
 
2951
        # record is handled, but isn't interesting to process (unchanged)
 
2952
        self.uninteresting = object()
3243
2953
        # Using a list so that we can access the values and change them in
3244
2954
        # nested scope. Each one is [path, file_id, entry]
3245
2955
        self.last_source_parent = [None, None]
3248
2958
        self.use_filesystem_for_exec = use_filesystem_for_exec
3249
2959
        self.utf8_decode = cache_utf8._utf8_decode
3250
2960
        # for all search_indexs in each path at or under each element of
3251
 
        # search_specific_files, if the detail is relocated: add the id, and
3252
 
        # add the relocated path as one to search if its not searched already.
3253
 
        # If the detail is not relocated, add the id.
 
2961
        # search_specific_files, if the detail is relocated: add the id, and add the
 
2962
        # relocated path as one to search if its not searched already. If the
 
2963
        # detail is not relocated, add the id.
3254
2964
        self.searched_specific_files = set()
3255
 
        # When we search exact paths without expanding downwards, we record
3256
 
        # that here.
3257
 
        self.searched_exact_paths = set()
3258
2965
        self.search_specific_files = search_specific_files
3259
 
        # The parents up to the root of the paths we are searching.
3260
 
        # After all normal paths are returned, these specific items are returned.
3261
 
        self.search_specific_file_parents = set()
3262
 
        # The ids we've sent out in the delta.
3263
 
        self.seen_ids = set()
3264
2966
        self.state = state
3265
2967
        self.source_index = source_index
3266
2968
        self.target_index = target_index
3267
 
        if target_index != 0:
3268
 
            # A lot of code in here depends on target_index == 0
3269
 
            raise errors.BzrError('unsupported target index')
3270
2969
        self.want_unversioned = want_unversioned
3271
2970
        self.tree = tree
3272
2971
 
3274
2973
        """Compare an entry and real disk to generate delta information.
3275
2974
 
3276
2975
        :param path_info: top_relpath, basename, kind, lstat, abspath for
3277
 
            the path of entry. If None, then the path is considered absent in 
3278
 
            the target (Perhaps we should pass in a concrete entry for this ?)
 
2976
            the path of entry. If None, then the path is considered absent.
 
2977
            (Perhaps we should pass in a concrete entry for this ?)
3279
2978
            Basename is returned as a utf8 string because we expect this
3280
2979
            tuple will be ignored, and don't want to take the time to
3281
2980
            decode.
3282
 
        :return: (iter_changes_result, changed). If the entry has not been
3283
 
            handled then changed is None. Otherwise it is False if no content
3284
 
            or metadata changes have occurred, and True if any content or
3285
 
            metadata change has occurred. If self.include_unchanged is True then
3286
 
            if changed is not None, iter_changes_result will always be a result
3287
 
            tuple. Otherwise, iter_changes_result is None unless changed is
3288
 
            True.
 
2981
        :return: None if these don't match
 
2982
                 A tuple of information about the change, or
 
2983
                 the object 'uninteresting' if these match, but are
 
2984
                 basically identical.
3289
2985
        """
3290
2986
        if self.source_index is None:
3291
2987
            source_details = DirState.NULL_PARENT_DETAILS
3359
3055
                    if source_minikind != 'f':
3360
3056
                        content_change = True
3361
3057
                    else:
3362
 
                        # Check the sha. We can't just rely on the size as
3363
 
                        # content filtering may mean differ sizes actually
3364
 
                        # map to the same content
3365
 
                        if link_or_sha1 is None:
3366
 
                            # Stat cache miss:
3367
 
                            statvalue, link_or_sha1 = \
3368
 
                                self.state._sha1_provider.stat_and_sha1(
3369
 
                                path_info[4])
3370
 
                            self.state._observed_sha1(entry, link_or_sha1,
3371
 
                                statvalue)
3372
 
                        content_change = (link_or_sha1 != source_details[1])
 
3058
                        # If the size is the same, check the sha:
 
3059
                        if target_details[2] == source_details[2]:
 
3060
                            if link_or_sha1 is None:
 
3061
                                # Stat cache miss:
 
3062
                                statvalue, link_or_sha1 = \
 
3063
                                    self.state._sha1_provider.stat_and_sha1(
 
3064
                                    path_info[4])
 
3065
                                self.state._observed_sha1(entry, link_or_sha1,
 
3066
                                    statvalue)
 
3067
                            content_change = (link_or_sha1 != source_details[1])
 
3068
                        else:
 
3069
                            # Size changed, so must be different
 
3070
                            content_change = True
3373
3071
                    # Target details is updated at update_entry time
3374
3072
                    if self.use_filesystem_for_exec:
3375
3073
                        # We don't need S_ISREG here, because we are sure
3390
3088
                        content_change = False
3391
3089
                    target_exec = False
3392
3090
                else:
3393
 
                    if path is None:
3394
 
                        path = pathjoin(old_dirname, old_basename)
3395
 
                    raise errors.BadFileKindError(path, path_info[2])
 
3091
                    raise Exception, "unknown kind %s" % path_info[2]
3396
3092
            if source_minikind == 'd':
3397
3093
                if path is None:
3398
3094
                    old_path = path = pathjoin(old_dirname, old_basename)
3399
3095
                self.old_dirname_to_file_id[old_path] = file_id
3400
3096
            # parent id is the entry for the path in the target tree
3401
 
            if old_basename and old_dirname == self.last_source_parent[0]:
 
3097
            if old_dirname == self.last_source_parent[0]:
3402
3098
                source_parent_id = self.last_source_parent[1]
3403
3099
            else:
3404
3100
                try:
3414
3110
                    self.last_source_parent[0] = old_dirname
3415
3111
                    self.last_source_parent[1] = source_parent_id
3416
3112
            new_dirname = entry[0][0]
3417
 
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
 
3113
            if new_dirname == self.last_target_parent[0]:
3418
3114
                target_parent_id = self.last_target_parent[1]
3419
3115
            else:
3420
3116
                try:
3437
3133
                    self.last_target_parent[1] = target_parent_id
3438
3134
 
3439
3135
            source_exec = source_details[3]
3440
 
            changed = (content_change
 
3136
            if (self.include_unchanged
 
3137
                or content_change
3441
3138
                or source_parent_id != target_parent_id
3442
3139
                or old_basename != entry[0][1]
3443
3140
                or source_exec != target_exec
3444
 
                )
3445
 
            if not changed and not self.include_unchanged:
3446
 
                return None, False
3447
 
            else:
 
3141
                ):
3448
3142
                if old_path is None:
3449
3143
                    old_path = path = pathjoin(old_dirname, old_basename)
3450
3144
                    old_path_u = self.utf8_decode(old_path)[0]
3463
3157
                       (source_parent_id, target_parent_id),
3464
3158
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3465
3159
                       (source_kind, target_kind),
3466
 
                       (source_exec, target_exec)), changed
 
3160
                       (source_exec, target_exec))
 
3161
            else:
 
3162
                return self.uninteresting
3467
3163
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3468
3164
            # looks like a new file
3469
3165
            path = pathjoin(entry[0][0], entry[0][1])
3490
3186
                       (None, parent_id),
3491
3187
                       (None, self.utf8_decode(entry[0][1])[0]),
3492
3188
                       (None, path_info[2]),
3493
 
                       (None, target_exec)), True
 
3189
                       (None, target_exec))
3494
3190
            else:
3495
3191
                # Its a missing file, report it as such.
3496
3192
                return (entry[0][2],
3500
3196
                       (None, parent_id),
3501
3197
                       (None, self.utf8_decode(entry[0][1])[0]),
3502
3198
                       (None, None),
3503
 
                       (None, False)), True
 
3199
                       (None, False))
3504
3200
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3505
3201
            # unversioned, possibly, or possibly not deleted: we dont care.
3506
3202
            # if its still on disk, *and* theres no other entry at this
3518
3214
                   (parent_id, None),
3519
3215
                   (self.utf8_decode(entry[0][1])[0], None),
3520
3216
                   (DirState._minikind_to_kind[source_minikind], None),
3521
 
                   (source_details[3], None)), True
 
3217
                   (source_details[3], None))
3522
3218
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3523
3219
            # a rename; could be a true rename, or a rename inherited from
3524
3220
            # a renamed parent. TODO: handle this efficiently. Its not
3536
3232
                "source_minikind=%r, target_minikind=%r"
3537
3233
                % (source_minikind, target_minikind))
3538
3234
            ## import pdb;pdb.set_trace()
3539
 
        return None, None
 
3235
        return None
3540
3236
 
3541
3237
    def __iter__(self):
3542
3238
        return self
3543
3239
 
3544
 
    def _gather_result_for_consistency(self, result):
3545
 
        """Check a result we will yield to make sure we are consistent later.
3546
 
        
3547
 
        This gathers result's parents into a set to output later.
3548
 
 
3549
 
        :param result: A result tuple.
3550
 
        """
3551
 
        if not self.partial or not result[0]:
3552
 
            return
3553
 
        self.seen_ids.add(result[0])
3554
 
        new_path = result[1][1]
3555
 
        if new_path:
3556
 
            # Not the root and not a delete: queue up the parents of the path.
3557
 
            self.search_specific_file_parents.update(
3558
 
                osutils.parent_directories(new_path.encode('utf8')))
3559
 
            # Add the root directory which parent_directories does not
3560
 
            # provide.
3561
 
            self.search_specific_file_parents.add('')
3562
 
 
3563
3240
    def iter_changes(self):
3564
3241
        """Iterate over the changes."""
3565
3242
        utf8_decode = cache_utf8._utf8_decode
3566
3243
        _cmp_by_dirs = cmp_by_dirs
3567
3244
        _process_entry = self._process_entry
 
3245
        uninteresting = self.uninteresting
3568
3246
        search_specific_files = self.search_specific_files
3569
3247
        searched_specific_files = self.searched_specific_files
3570
3248
        splitpath = osutils.splitpath
3640
3318
                continue
3641
3319
            path_handled = False
3642
3320
            for entry in root_entries:
3643
 
                result, changed = _process_entry(entry, root_dir_info)
3644
 
                if changed is not None:
 
3321
                result = _process_entry(entry, root_dir_info)
 
3322
                if result is not None:
3645
3323
                    path_handled = True
3646
 
                    if changed:
3647
 
                        self._gather_result_for_consistency(result)
3648
 
                    if changed or self.include_unchanged:
 
3324
                    if result is not uninteresting:
3649
3325
                        yield result
3650
3326
            if self.want_unversioned and not path_handled and root_dir_info:
3651
3327
                new_executable = bool(
3761
3437
                        for current_entry in current_block[1]:
3762
3438
                            # entry referring to file not present on disk.
3763
3439
                            # advance the entry only, after processing.
3764
 
                            result, changed = _process_entry(current_entry, None)
3765
 
                            if changed is not None:
3766
 
                                if changed:
3767
 
                                    self._gather_result_for_consistency(result)
3768
 
                                if changed or self.include_unchanged:
 
3440
                            result = _process_entry(current_entry, None)
 
3441
                            if result is not None:
 
3442
                                if result is not uninteresting:
3769
3443
                                    yield result
3770
3444
                        block_index +=1
3771
3445
                        if (block_index < len(self.state._dirblocks) and
3801
3475
                        pass
3802
3476
                    elif current_path_info is None:
3803
3477
                        # no path is fine: the per entry code will handle it.
3804
 
                        result, changed = _process_entry(current_entry, current_path_info)
3805
 
                        if changed is not None:
3806
 
                            if changed:
3807
 
                                self._gather_result_for_consistency(result)
3808
 
                            if changed or self.include_unchanged:
 
3478
                        result = _process_entry(current_entry, current_path_info)
 
3479
                        if result is not None:
 
3480
                            if result is not uninteresting:
3809
3481
                                yield result
3810
3482
                    elif (current_entry[0][1] != current_path_info[1]
3811
3483
                          or current_entry[1][self.target_index][0] in 'ar'):
3824
3496
                        else:
3825
3497
                            # entry referring to file not present on disk.
3826
3498
                            # advance the entry only, after processing.
3827
 
                            result, changed = _process_entry(current_entry, None)
3828
 
                            if changed is not None:
3829
 
                                if changed:
3830
 
                                    self._gather_result_for_consistency(result)
3831
 
                                if changed or self.include_unchanged:
 
3499
                            result = _process_entry(current_entry, None)
 
3500
                            if result is not None:
 
3501
                                if result is not uninteresting:
3832
3502
                                    yield result
3833
3503
                            advance_path = False
3834
3504
                    else:
3835
 
                        result, changed = _process_entry(current_entry, current_path_info)
3836
 
                        if changed is not None:
 
3505
                        result = _process_entry(current_entry, current_path_info)
 
3506
                        if result is not None:
3837
3507
                            path_handled = True
3838
 
                            if changed:
3839
 
                                self._gather_result_for_consistency(result)
3840
 
                            if changed or self.include_unchanged:
 
3508
                            if result is not uninteresting:
3841
3509
                                yield result
3842
3510
                    if advance_entry and current_entry is not None:
3843
3511
                        entry_index += 1
3902
3570
                        current_dir_info = dir_iterator.next()
3903
3571
                    except StopIteration:
3904
3572
                        current_dir_info = None
3905
 
        for result in self._iter_specific_file_parents():
3906
 
            yield result
3907
 
 
3908
 
    def _iter_specific_file_parents(self):
3909
 
        """Iter over the specific file parents."""
3910
 
        while self.search_specific_file_parents:
3911
 
            # Process the parent directories for the paths we were iterating.
3912
 
            # Even in extremely large trees this should be modest, so currently
3913
 
            # no attempt is made to optimise.
3914
 
            path_utf8 = self.search_specific_file_parents.pop()
3915
 
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
3916
 
                # We've examined this path.
3917
 
                continue
3918
 
            if path_utf8 in self.searched_exact_paths:
3919
 
                # We've examined this path.
3920
 
                continue
3921
 
            path_entries = self.state._entries_for_path(path_utf8)
3922
 
            # We need either one or two entries. If the path in
3923
 
            # self.target_index has moved (so the entry in source_index is in
3924
 
            # 'ar') then we need to also look for the entry for this path in
3925
 
            # self.source_index, to output the appropriate delete-or-rename.
3926
 
            selected_entries = []
3927
 
            found_item = False
3928
 
            for candidate_entry in path_entries:
3929
 
                # Find entries present in target at this path:
3930
 
                if candidate_entry[1][self.target_index][0] not in 'ar':
3931
 
                    found_item = True
3932
 
                    selected_entries.append(candidate_entry)
3933
 
                # Find entries present in source at this path:
3934
 
                elif (self.source_index is not None and
3935
 
                    candidate_entry[1][self.source_index][0] not in 'ar'):
3936
 
                    found_item = True
3937
 
                    if candidate_entry[1][self.target_index][0] == 'a':
3938
 
                        # Deleted, emit it here.
3939
 
                        selected_entries.append(candidate_entry)
3940
 
                    else:
3941
 
                        # renamed, emit it when we process the directory it
3942
 
                        # ended up at.
3943
 
                        self.search_specific_file_parents.add(
3944
 
                            candidate_entry[1][self.target_index][1])
3945
 
            if not found_item:
3946
 
                raise AssertionError(
3947
 
                    "Missing entry for specific path parent %r, %r" % (
3948
 
                    path_utf8, path_entries))
3949
 
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
3950
 
            for entry in selected_entries:
3951
 
                if entry[0][2] in self.seen_ids:
3952
 
                    continue
3953
 
                result, changed = self._process_entry(entry, path_info)
3954
 
                if changed is None:
3955
 
                    raise AssertionError(
3956
 
                        "Got entry<->path mismatch for specific path "
3957
 
                        "%r entry %r path_info %r " % (
3958
 
                        path_utf8, entry, path_info))
3959
 
                # Only include changes - we're outside the users requested
3960
 
                # expansion.
3961
 
                if changed:
3962
 
                    self._gather_result_for_consistency(result)
3963
 
                    if (result[6][0] == 'directory' and
3964
 
                        result[6][1] != 'directory'):
3965
 
                        # This stopped being a directory, the old children have
3966
 
                        # to be included.
3967
 
                        if entry[1][self.source_index][0] == 'r':
3968
 
                            # renamed, take the source path
3969
 
                            entry_path_utf8 = entry[1][self.source_index][1]
3970
 
                        else:
3971
 
                            entry_path_utf8 = path_utf8
3972
 
                        initial_key = (entry_path_utf8, '', '')
3973
 
                        block_index, _ = self.state._find_block_index_from_key(
3974
 
                            initial_key)
3975
 
                        if block_index == 0:
3976
 
                            # The children of the root are in block index 1.
3977
 
                            block_index +=1
3978
 
                        current_block = None
3979
 
                        if block_index < len(self.state._dirblocks):
3980
 
                            current_block = self.state._dirblocks[block_index]
3981
 
                            if not osutils.is_inside(
3982
 
                                entry_path_utf8, current_block[0]):
3983
 
                                # No entries for this directory at all.
3984
 
                                current_block = None
3985
 
                        if current_block is not None:
3986
 
                            for entry in current_block[1]:
3987
 
                                if entry[1][self.source_index][0] in 'ar':
3988
 
                                    # Not in the source tree, so doesn't have to be
3989
 
                                    # included.
3990
 
                                    continue
3991
 
                                # Path of the entry itself.
3992
 
 
3993
 
                                self.search_specific_file_parents.add(
3994
 
                                    osutils.pathjoin(*entry[0][:2]))
3995
 
                if changed or self.include_unchanged:
3996
 
                    yield result
3997
 
            self.searched_exact_paths.add(path_utf8)
3998
 
 
3999
 
    def _path_info(self, utf8_path, unicode_path):
4000
 
        """Generate path_info for unicode_path.
4001
 
 
4002
 
        :return: None if unicode_path does not exist, or a path_info tuple.
4003
 
        """
4004
 
        abspath = self.tree.abspath(unicode_path)
4005
 
        try:
4006
 
            stat = os.lstat(abspath)
4007
 
        except OSError, e:
4008
 
            if e.errno == errno.ENOENT:
4009
 
                # the path does not exist.
4010
 
                return None
4011
 
            else:
4012
 
                raise
4013
 
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
4014
 
        dir_info = (utf8_path, utf8_basename,
4015
 
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
4016
 
            abspath)
4017
 
        if dir_info[2] == 'directory':
4018
 
            if self.tree._directory_is_tree_reference(
4019
 
                unicode_path):
4020
 
                self.root_dir_info = self.root_dir_info[:2] + \
4021
 
                    ('tree-reference',) + self.root_dir_info[3:]
4022
 
        return dir_info
 
3573
_process_entry = ProcessEntryPython
4023
3574
 
4024
3575
 
4025
3576
# Try to load the compiled form if possible
4026
3577
try:
4027
 
    from bzrlib._dirstate_helpers_pyx import (
4028
 
        _read_dirblocks,
4029
 
        bisect_dirblock,
4030
 
        _bisect_path_left,
4031
 
        _bisect_path_right,
4032
 
        cmp_by_dirs,
 
3578
    from bzrlib._dirstate_helpers_c import (
 
3579
        _read_dirblocks_c as _read_dirblocks,
 
3580
        bisect_dirblock_c as bisect_dirblock,
 
3581
        _bisect_path_left_c as _bisect_path_left,
 
3582
        _bisect_path_right_c as _bisect_path_right,
 
3583
        cmp_by_dirs_c as cmp_by_dirs,
4033
3584
        ProcessEntryC as _process_entry,
4034
3585
        update_entry as update_entry,
4035
3586
        )
4036
 
except ImportError, e:
4037
 
    osutils.failed_to_load_extension(e)
 
3587
except ImportError:
4038
3588
    from bzrlib._dirstate_helpers_py import (
4039
 
        _read_dirblocks,
4040
 
        bisect_dirblock,
4041
 
        _bisect_path_left,
4042
 
        _bisect_path_right,
4043
 
        cmp_by_dirs,
 
3589
        _read_dirblocks_py as _read_dirblocks,
 
3590
        bisect_dirblock_py as bisect_dirblock,
 
3591
        _bisect_path_left_py as _bisect_path_left,
 
3592
        _bisect_path_right_py as _bisect_path_right,
 
3593
        cmp_by_dirs_py as cmp_by_dirs,
4044
3594
        )
4045
 
    # FIXME: It would be nice to be able to track moved lines so that the
4046
 
    # corresponding python code can be moved to the _dirstate_helpers_py
4047
 
    # module. I don't want to break the history for this important piece of
4048
 
    # code so I left the code here -- vila 20090622
4049
 
    update_entry = py_update_entry
4050
 
    _process_entry = ProcessEntryPython